Index: test/CodeGen/X86/aes-schedule.ll =================================================================== --- test/CodeGen/X86/aes-schedule.ll +++ test/CodeGen/X86/aes-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -34,6 +35,12 @@ ; HASWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesdec: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesdec: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -83,6 +90,12 @@ ; HASWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesdeclast: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesdeclast: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -132,6 +145,12 @@ ; HASWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesenc: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesenc: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -181,6 +200,12 @@ ; HASWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesenclast: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesenclast: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -234,6 +259,13 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesimc: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] +; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesimc: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00] @@ -291,6 +323,13 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aeskeygenassist: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00] +; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [28:7.00] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aeskeygenassist: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00] Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -27,6 +28,12 @@ ; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -75,6 +82,12 @@ ; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -123,6 +136,12 @@ ; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -172,6 +191,12 @@ ; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -224,6 +249,13 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -286,6 +318,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -348,6 +387,13 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -408,6 +454,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -468,6 +521,13 @@ ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] @@ -521,6 +581,12 @@ ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] +; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] @@ -569,6 +635,12 @@ ; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] @@ -618,6 +690,12 @@ ; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] @@ -664,6 +742,11 @@ ; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastf128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastf128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] @@ -704,6 +787,11 @@ ; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastsd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastsd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] @@ -745,6 +833,11 @@ ; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] @@ -786,6 +879,11 @@ ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] @@ -833,6 +931,13 @@ ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmppd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33] @@ -894,6 +999,13 @@ ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33] @@ -955,6 +1067,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] @@ -1013,6 +1132,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] @@ -1069,6 +1195,13 @@ ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] @@ -1125,6 +1258,13 @@ ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] @@ -1181,6 +1321,13 @@ ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33] @@ -1234,6 +1381,12 @@ ; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00] @@ -1282,6 +1435,12 @@ ; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00] @@ -1330,6 +1489,12 @@ ; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_dpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] +; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33] @@ -1382,6 +1547,13 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_extractf128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_extractf128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] @@ -1433,6 +1605,12 @@ ; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] @@ -1482,6 +1660,12 @@ ; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] @@ -1531,6 +1715,12 @@ ; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] @@ -1580,6 +1770,12 @@ ; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] @@ -1632,6 +1828,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_insertf128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_insertf128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -1684,6 +1887,11 @@ ; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lddqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] @@ -1730,6 +1938,13 @@ ; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] @@ -1786,6 +2001,13 @@ ; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] @@ -1842,6 +2064,13 @@ ; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] @@ -1898,6 +2127,13 @@ ; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] @@ -1951,6 +2187,12 @@ ; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -2000,6 +2242,12 @@ ; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -2049,6 +2297,12 @@ ; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -2098,6 +2352,12 @@ ; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -2150,6 +2410,13 @@ ; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movapd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] @@ -2205,6 +2472,13 @@ ; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movaps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] @@ -2260,6 +2534,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movddup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] +; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] @@ -2313,6 +2594,12 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] @@ -2359,6 +2646,12 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] @@ -2405,6 +2698,12 @@ ; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] @@ -2452,6 +2751,12 @@ ; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] @@ -2502,6 +2807,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movshdup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] +; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] @@ -2558,6 +2870,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsldup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] +; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] @@ -2616,6 +2935,13 @@ ; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movupd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] @@ -2673,6 +2999,13 @@ ; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movups: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] @@ -2725,6 +3058,12 @@ ; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -2773,6 +3112,12 @@ ; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -2824,6 +3169,13 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: orpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: orpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -2884,6 +3236,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_orps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -2944,6 +3303,13 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_perm2f128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_perm2f128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] @@ -3000,6 +3366,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] @@ -3056,6 +3429,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] @@ -3112,6 +3492,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] @@ -3168,6 +3555,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] @@ -3221,6 +3615,12 @@ ; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3270,6 +3670,12 @@ ; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -3319,6 +3725,12 @@ ; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3368,6 +3780,12 @@ ; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -3420,6 +3838,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rcpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00] +; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] @@ -3477,6 +3902,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] @@ -3534,6 +3966,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] @@ -3591,6 +4030,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rsqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00] +; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] @@ -3648,6 +4094,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] @@ -3701,6 +4154,12 @@ ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] +; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] @@ -3752,6 +4211,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00] +; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00] @@ -3809,6 +4275,13 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00] +; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00] @@ -3863,6 +4336,12 @@ ; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -3911,6 +4390,12 @@ ; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] @@ -3968,6 +4453,15 @@ ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] @@ -4042,6 +4536,16 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] @@ -4116,6 +4620,15 @@ ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] @@ -4190,6 +4703,16 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] @@ -4258,6 +4781,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] @@ -4311,6 +4841,12 @@ ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] @@ -4362,6 +4898,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] @@ -4415,6 +4958,12 @@ ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] @@ -4466,6 +5015,13 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -4526,6 +5082,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -4580,6 +5143,11 @@ ; HASWELL-NEXT: vzeroall # sched: [16:16.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_zeroall: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vzeroall # sched: [16:16.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_zeroall: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vzeroall # sched: [16:4.00] @@ -4620,6 +5188,11 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_zeroupper: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_zeroupper: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] Index: test/CodeGen/X86/avx2-schedule.ll =================================================================== --- test/CodeGen/X86/avx2-schedule.ll +++ test/CodeGen/X86/avx2-schedule.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -18,6 +19,12 @@ ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcasti128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcasti128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] @@ -54,6 +61,12 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastsd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastsd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] @@ -89,6 +102,12 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] @@ -124,6 +143,12 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] @@ -165,6 +190,15 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_extracti128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_extracti128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] @@ -210,6 +244,11 @@ ; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -240,6 +279,11 @@ ; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] @@ -270,6 +314,11 @@ ; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -300,6 +349,11 @@ ; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] @@ -330,6 +384,11 @@ ; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -360,6 +419,11 @@ ; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] @@ -390,6 +454,11 @@ ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -422,6 +491,12 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] @@ -459,6 +534,13 @@ ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_inserti128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_inserti128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -499,6 +581,11 @@ ; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] @@ -531,6 +618,12 @@ ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mpsadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] +; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] @@ -571,6 +664,13 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] @@ -614,6 +714,13 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] @@ -657,6 +764,13 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] @@ -698,6 +812,12 @@ ; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packssdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -736,6 +856,12 @@ ; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packsswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -774,6 +900,12 @@ ; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packusdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -812,6 +944,12 @@ ; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packuswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -850,6 +988,12 @@ ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -886,6 +1030,12 @@ ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -922,6 +1072,12 @@ ; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -958,6 +1114,12 @@ ; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -995,6 +1157,12 @@ ; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1032,6 +1200,12 @@ ; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1069,6 +1243,12 @@ ; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1106,6 +1286,12 @@ ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -1142,6 +1328,12 @@ ; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_palignr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] +; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] @@ -1180,6 +1372,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pand: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -1222,6 +1421,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pandn: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -1264,6 +1470,12 @@ ; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1310,6 +1522,12 @@ ; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1358,6 +1576,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] +; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] @@ -1400,6 +1625,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] +; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] @@ -1440,6 +1672,12 @@ ; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendvb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] @@ -1477,6 +1715,12 @@ ; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] @@ -1515,6 +1759,13 @@ ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] @@ -1557,6 +1808,13 @@ ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastb_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastb_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] @@ -1599,6 +1857,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] @@ -1640,6 +1905,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] @@ -1681,6 +1953,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] @@ -1722,6 +2001,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] @@ -1763,6 +2049,13 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] @@ -1805,6 +2098,13 @@ ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastw_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastw_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] @@ -1845,6 +2145,12 @@ ; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1885,6 +2191,12 @@ ; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1925,6 +2237,12 @@ ; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1965,6 +2283,12 @@ ; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2005,6 +2329,12 @@ ; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2045,6 +2375,12 @@ ; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2085,6 +2421,12 @@ ; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -2125,6 +2467,12 @@ ; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2167,6 +2515,13 @@ ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_perm2i128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_perm2i128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] @@ -2209,6 +2564,13 @@ ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -2252,6 +2614,13 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] @@ -2294,6 +2663,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -2337,6 +2713,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] @@ -2375,6 +2758,11 @@ ; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -2405,6 +2793,11 @@ ; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] @@ -2435,6 +2828,11 @@ ; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -2465,6 +2863,11 @@ ; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] @@ -2495,6 +2898,11 @@ ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -2527,6 +2935,12 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] @@ -2560,6 +2974,11 @@ ; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] @@ -2590,6 +3009,11 @@ ; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] @@ -2622,6 +3046,12 @@ ; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -2659,6 +3089,12 @@ ; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -2696,6 +3132,12 @@ ; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -2733,6 +3175,12 @@ ; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -2770,6 +3218,12 @@ ; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -2807,6 +3261,12 @@ ; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -2844,6 +3304,12 @@ ; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -2882,6 +3348,12 @@ ; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -2922,6 +3394,13 @@ ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] @@ -2964,6 +3443,13 @@ ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] @@ -3006,6 +3492,13 @@ ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] @@ -3048,6 +3541,13 @@ ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] @@ -3088,6 +3588,12 @@ ; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3125,6 +3631,12 @@ ; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3162,6 +3674,12 @@ ; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3199,6 +3717,12 @@ ; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3236,6 +3760,12 @@ ; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3273,6 +3803,12 @@ ; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3310,6 +3846,12 @@ ; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3347,6 +3889,12 @@ ; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3384,6 +3932,12 @@ ; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3421,6 +3975,12 @@ ; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3458,6 +4018,12 @@ ; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3495,6 +4061,12 @@ ; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3532,6 +4104,12 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovmskb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] @@ -3569,6 +4147,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] @@ -3613,6 +4198,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] @@ -3657,6 +4249,13 @@ ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] @@ -3699,6 +4298,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] @@ -3741,6 +4347,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] @@ -3783,6 +4396,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] @@ -3827,6 +4447,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] @@ -3871,6 +4498,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] @@ -3915,6 +4549,13 @@ ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] @@ -3957,6 +4598,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] @@ -3999,6 +4647,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] @@ -4041,6 +4696,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] @@ -4083,6 +4745,12 @@ ; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -4121,6 +4789,12 @@ ; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhrsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -4158,6 +4832,12 @@ ; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -4195,6 +4875,12 @@ ; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -4232,6 +4918,12 @@ ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] +; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67] @@ -4268,6 +4960,12 @@ ; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmullw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -4304,6 +5002,12 @@ ; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuludq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] @@ -4344,6 +5048,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_por: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -4384,6 +5095,12 @@ ; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -4422,6 +5139,12 @@ ; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -4461,6 +5184,13 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] @@ -4503,6 +5233,13 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] +; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] @@ -4545,6 +5282,13 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshuflw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] @@ -4585,6 +5329,12 @@ ; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4622,6 +5372,12 @@ ; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4659,6 +5415,12 @@ ; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4698,6 +5460,13 @@ ; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -4737,6 +5506,11 @@ ; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] @@ -4770,6 +5544,13 @@ ; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -4811,6 +5592,12 @@ ; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4848,6 +5635,12 @@ ; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4885,6 +5678,12 @@ ; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4922,6 +5721,12 @@ ; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4961,6 +5766,13 @@ ; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -5004,6 +5816,13 @@ ; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrad: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -5045,6 +5864,12 @@ ; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psravd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psravd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5082,6 +5907,12 @@ ; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psravd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psravd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5121,6 +5952,13 @@ ; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psraw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -5164,6 +6002,13 @@ ; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -5203,6 +6048,11 @@ ; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] @@ -5236,6 +6086,13 @@ ; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -5277,6 +6134,12 @@ ; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5314,6 +6177,12 @@ ; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5351,6 +6220,12 @@ ; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5388,6 +6263,12 @@ ; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5427,6 +6308,13 @@ ; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -5468,6 +6356,12 @@ ; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -5504,6 +6398,12 @@ ; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -5540,6 +6440,12 @@ ; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -5576,6 +6482,12 @@ ; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5613,6 +6525,12 @@ ; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5650,6 +6568,12 @@ ; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5687,6 +6611,12 @@ ; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -5724,6 +6654,12 @@ ; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -5760,6 +6696,12 @@ ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] @@ -5800,6 +6742,14 @@ ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] @@ -5845,6 +6795,13 @@ ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] @@ -5885,6 +6842,12 @@ ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] @@ -5921,6 +6884,12 @@ ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] @@ -5961,6 +6930,14 @@ ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] @@ -6006,6 +6983,13 @@ ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] @@ -6046,6 +7030,12 @@ ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] @@ -6084,6 +7074,13 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pxor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] Index: test/CodeGen/X86/bmi-schedule.ll =================================================================== --- test/CodeGen/X86/bmi-schedule.ll +++ test/CodeGen/X86/bmi-schedule.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -25,6 +26,15 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andn_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: andnl %esi, %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: notl %edi # sched: [1:0.25] +; BROADWELL-NEXT: andw (%rdx), %di # sched: [1:0.50] +; BROADWELL-NEXT: addl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andn_i16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: andnl %esi, %edi, %eax # sched: [1:0.50] @@ -74,6 +84,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andn_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andn_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] @@ -117,6 +134,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andn_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andn_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] @@ -160,6 +184,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_bextr_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [2:0.50] +; BROADWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_bextr_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50] @@ -203,6 +234,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_bextr_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [2:0.50] +; BROADWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_bextr_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50] @@ -246,6 +284,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsi_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsil (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsil %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsi_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] @@ -290,6 +335,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsi_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsiq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsi_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] @@ -334,6 +386,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsmsk_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsmskl (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsmsk_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] @@ -378,6 +437,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsmsk_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsmskq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsmsk_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] @@ -422,6 +488,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsr_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsrl (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsr_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] @@ -466,6 +539,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsr_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsrq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsr_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] @@ -512,6 +592,14 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cttz_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: tzcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cttz_i16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] @@ -558,6 +646,13 @@ ; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cttz_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: tzcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cttz_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] @@ -601,6 +696,13 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cttz_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: tzcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cttz_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] Index: test/CodeGen/X86/bmi2-schedule.ll =================================================================== --- test/CodeGen/X86/bmi2-schedule.ll +++ test/CodeGen/X86/bmi2-schedule.ll @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=COMMON --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=COMMON --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=COMMON --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) { @@ -20,6 +21,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_bzhi_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_bzhi_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] @@ -63,6 +71,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_bzhi_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_bzhi_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] @@ -112,6 +127,15 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movq %rdx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25] +; BROADWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulx_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movq %rdx, %rax # sched: [1:0.25] @@ -167,6 +191,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pdep_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00] +; BROADWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pdep_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] @@ -210,6 +241,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pdep_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00] +; BROADWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pdep_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] @@ -253,6 +291,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pext_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00] +; BROADWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pext_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] @@ -296,6 +341,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pext_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00] +; BROADWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pext_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] @@ -339,6 +391,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rorx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rorx_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] @@ -385,6 +444,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rorx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rorx_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] @@ -431,6 +497,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sarx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sarx_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] @@ -473,6 +546,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sarx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sarx_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] @@ -515,6 +595,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shlx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shlx_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] @@ -557,6 +644,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shlx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shlx_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] @@ -599,6 +693,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shrx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shrx_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] @@ -641,6 +742,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shrx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shrx_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] Index: test/CodeGen/X86/f16c-schedule.ll =================================================================== --- test/CodeGen/X86/f16c-schedule.ll +++ test/CodeGen/X86/f16c-schedule.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -28,6 +29,13 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtph2ps_128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtph2ps_128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50] @@ -78,6 +86,13 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtph2ps_256: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtph2ps_256: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50] @@ -125,6 +140,12 @@ ; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtps2ph_128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtps2ph_128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00] @@ -172,6 +193,13 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtps2ph_256: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtps2ph_256: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00] Index: test/CodeGen/X86/fma-schedule.ll =================================================================== --- test/CodeGen/X86/fma-schedule.ll +++ test/CodeGen/X86/fma-schedule.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX @@ -27,6 +28,12 @@ ; HASWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -69,6 +76,12 @@ ; HASWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -111,6 +124,12 @@ ; HASWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -153,6 +172,12 @@ ; HASWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -195,6 +220,12 @@ ; HASWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -237,6 +268,12 @@ ; HASWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -291,6 +328,12 @@ ; HASWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -333,6 +376,12 @@ ; HASWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -375,6 +424,12 @@ ; HASWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -417,6 +472,12 @@ ; HASWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -471,6 +532,12 @@ ; HASWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -513,6 +580,12 @@ ; HASWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -555,6 +628,12 @@ ; HASWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -597,6 +676,12 @@ ; HASWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -651,6 +736,12 @@ ; HASWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -693,6 +784,12 @@ ; HASWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -735,6 +832,12 @@ ; HASWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -777,6 +880,12 @@ ; HASWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -819,6 +928,12 @@ ; HASWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -861,6 +976,12 @@ ; HASWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -915,6 +1036,12 @@ ; HASWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -957,6 +1084,12 @@ ; HASWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -999,6 +1132,12 @@ ; HASWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -1041,6 +1180,12 @@ ; HASWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -1083,6 +1228,12 @@ ; HASWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -1125,6 +1276,12 @@ ; HASWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -1179,6 +1336,12 @@ ; HASWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -1221,6 +1384,12 @@ ; HASWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -1263,6 +1432,12 @@ ; HASWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -1305,6 +1480,12 @@ ; HASWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] @@ -1347,6 +1528,12 @@ ; HASWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] @@ -1389,6 +1576,12 @@ ; HASWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] Index: test/CodeGen/X86/lea32-schedule.ll =================================================================== --- test/CodeGen/X86/lea32-schedule.ll +++ test/CodeGen/X86/lea32-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -47,6 +48,12 @@ ; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI @@ -105,6 +112,12 @@ ; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI @@ -169,6 +182,13 @@ ; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -238,6 +258,14 @@ ; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $16, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -312,6 +340,15 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $-4096, %eax # imm = 0xF000 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -376,6 +413,12 @@ ; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI @@ -437,6 +480,13 @@ ; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $-32, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI @@ -503,6 +553,14 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $10000, %eax # imm = 0x2710 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI @@ -569,6 +627,13 @@ ; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -639,6 +704,14 @@ ; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $96, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -714,6 +787,15 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $-1200, %eax # imm = 0xFB50 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI Index: test/CodeGen/X86/lea64-schedule.ll =================================================================== --- test/CodeGen/X86/lea64-schedule.ll +++ test/CodeGen/X86/lea64-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -42,6 +43,11 @@ ; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] @@ -92,6 +98,11 @@ ; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] @@ -143,6 +154,11 @@ ; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] @@ -196,6 +212,12 @@ ; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $16, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] @@ -254,6 +276,13 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $-4096, %rax # imm = 0xF000 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] @@ -307,6 +336,11 @@ ; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] @@ -360,6 +394,12 @@ ; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $-32, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] @@ -418,6 +458,13 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $10000, %rax # imm = 0x2710 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] @@ -471,6 +518,11 @@ ; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] @@ -525,6 +577,12 @@ ; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $96, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] @@ -584,6 +642,13 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $-1200, %rax # imm = 0xFB50 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] Index: test/CodeGen/X86/lzcnt-schedule.ll =================================================================== --- test/CodeGen/X86/lzcnt-schedule.ll +++ test/CodeGen/X86/lzcnt-schedule.ll @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) { ; GENERIC-LABEL: test_ctlz_i16: @@ -23,6 +24,14 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctlz_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lzcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctlz_i16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] @@ -69,6 +78,13 @@ ; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctlz_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lzcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctlz_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] @@ -112,6 +128,13 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctlz_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lzcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctlz_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] Index: test/CodeGen/X86/mmx-schedule.ll =================================================================== --- test/CodeGen/X86/mmx-schedule.ll +++ test/CodeGen/X86/mmx-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -51,13 +52,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] +; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2pi: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [5:1.00] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2pi: ; SKX: # BB#0: @@ -127,12 +136,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpi2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpi2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [5:1.00] +; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpi2pd: ; SKX: # BB#0: @@ -198,12 +214,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpi2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpi2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [4:1.00] +; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpi2ps: ; SKX: # BB#0: @@ -274,13 +297,21 @@ ; HASWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [3:1.00] +; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2pi: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [4:0.50] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2pi: ; SKX: # BB#0: @@ -355,13 +386,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttpd2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] +; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttpd2pi: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [5:1.00] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttpd2pi: ; SKX: # BB#0: @@ -436,13 +475,21 @@ ; HASWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttps2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [3:1.00] +; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttps2pi: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [4:0.50] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttps2pi: ; SKX: # BB#0: @@ -502,10 +549,15 @@ ; HASWELL-NEXT: emms # sched: [31:10.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_emms: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: emms # sched: [31:10.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_emms: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: emms # sched: [10:4.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_emms: ; SKX: # BB#0: @@ -552,10 +604,15 @@ ; HASWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maskmovq: ; SKX: # BB#0: @@ -647,19 +704,33 @@ ; HASWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm1, %ecx # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] +; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKYLAKE-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; SKYLAKE-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:0.50] +; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; SKYLAKE-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKYLAKE-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] -; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:0.50] +; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] ; SKYLAKE-NEXT: movd %mm1, %ecx # sched: [2:1.00] ; SKYLAKE-NEXT: movd %mm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movd: ; SKX: # BB#0: @@ -753,12 +824,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movdq2q: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] +; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movdq2q: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] -; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movdq2q: ; SKX: # BB#0: @@ -813,10 +891,15 @@ ; HASWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntq: ; SKX: # BB#0: @@ -875,12 +958,19 @@ ; HASWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] +; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] ; SKYLAKE-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movq: ; SKX: # BB#0: @@ -936,10 +1026,15 @@ ; HASWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movq2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movq2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movq2dq: ; SKX: # BB#0: @@ -996,12 +1091,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsb: ; SKX: # BB#0: @@ -1067,12 +1169,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsd: ; SKX: # BB#0: @@ -1138,12 +1247,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsw: ; SKX: # BB#0: @@ -1209,12 +1325,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packssdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packssdw: ; SKX: # BB#0: @@ -1280,12 +1403,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packsswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packsswb: ; SKX: # BB#0: @@ -1351,12 +1481,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packuswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packuswb: ; SKX: # BB#0: @@ -1422,12 +1559,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddb: ; SKX: # BB#0: @@ -1493,12 +1637,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddd: ; SKX: # BB#0: @@ -1564,12 +1715,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddq: ; SKX: # BB#0: @@ -1635,12 +1793,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddsb: ; SKX: # BB#0: @@ -1706,12 +1871,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddsw: ; SKX: # BB#0: @@ -1777,12 +1949,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddusb: ; SKX: # BB#0: @@ -1848,12 +2027,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddusw: ; SKX: # BB#0: @@ -1919,12 +2105,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddw: ; SKX: # BB#0: @@ -1990,12 +2183,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_palignr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_palignr: ; SKX: # BB#0: @@ -2061,12 +2261,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pand: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pand: ; SKX: # BB#0: @@ -2132,12 +2339,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pandn: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pandn: ; SKX: # BB#0: @@ -2203,12 +2417,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pavgb: ; SKX: # BB#0: @@ -2274,12 +2495,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pavgw: ; SKX: # BB#0: @@ -2345,12 +2573,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqb: ; SKX: # BB#0: @@ -2416,12 +2651,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqd: ; SKX: # BB#0: @@ -2487,12 +2729,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqw: ; SKX: # BB#0: @@ -2558,12 +2807,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtb: ; SKX: # BB#0: @@ -2629,12 +2885,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtd: ; SKX: # BB#0: @@ -2700,12 +2963,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtw: ; SKX: # BB#0: @@ -2761,10 +3031,15 @@ ; HASWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrw: ; SKX: # BB#0: @@ -2821,12 +3096,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddd: ; SKX: # BB#0: @@ -2892,12 +3174,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddsw: ; SKX: # BB#0: @@ -2963,12 +3252,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddw: ; SKX: # BB#0: @@ -3034,12 +3330,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubd: ; SKX: # BB#0: @@ -3105,12 +3408,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubsw: ; SKX: # BB#0: @@ -3176,12 +3486,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubw: ; SKX: # BB#0: @@ -3252,13 +3569,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [4:0.50] +; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [1:0.50] +; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pinsrw: ; SKX: # BB#0: @@ -3328,12 +3653,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddwd: ; SKX: # BB#0: @@ -3399,12 +3731,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # BB#0: @@ -3470,12 +3809,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsw: ; SKX: # BB#0: @@ -3541,12 +3887,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxub: ; SKX: # BB#0: @@ -3612,12 +3965,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsw: ; SKX: # BB#0: @@ -3683,12 +4043,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminub: ; SKX: # BB#0: @@ -3744,10 +4111,15 @@ ; HASWELL-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovmskb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovmskb: ; SKX: # BB#0: @@ -3804,12 +4176,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhrsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # BB#0: @@ -3875,12 +4254,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhw: ; SKX: # BB#0: @@ -3946,12 +4332,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhuw: ; SKX: # BB#0: @@ -4017,12 +4410,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmullw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmullw: ; SKX: # BB#0: @@ -4088,12 +4488,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuludq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuludq: ; SKX: # BB#0: @@ -4159,12 +4566,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_por: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_por: ; SKX: # BB#0: @@ -4230,12 +4644,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] -; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [3:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psadbw: ; SKX: # BB#0: @@ -4301,12 +4722,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufb: ; SKX: # BB#0: @@ -4372,12 +4800,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] +; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] +; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; SKYLAKE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufw: ; SKX: # BB#0: @@ -4443,12 +4878,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignb: ; SKX: # BB#0: @@ -4514,12 +4956,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignd: ; SKX: # BB#0: @@ -4585,12 +5034,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignw: ; SKX: # BB#0: @@ -4661,13 +5117,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pslld: ; SKX: # BB#0: @@ -4743,13 +5207,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllq: ; SKX: # BB#0: @@ -4825,13 +5297,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllw: ; SKX: # BB#0: @@ -4907,13 +5387,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrad: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrad: ; SKX: # BB#0: @@ -4989,13 +5477,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psraw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psraw: ; SKX: # BB#0: @@ -5071,13 +5567,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrld: ; SKX: # BB#0: @@ -5153,13 +5657,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlq: ; SKX: # BB#0: @@ -5235,13 +5747,21 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; SKYLAKE-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlw: ; SKX: # BB#0: @@ -5312,12 +5832,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubb: ; SKX: # BB#0: @@ -5383,12 +5910,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubd: ; SKX: # BB#0: @@ -5454,12 +5988,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubq: ; SKX: # BB#0: @@ -5525,12 +6066,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubsb: ; SKX: # BB#0: @@ -5596,12 +6144,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubsw: ; SKX: # BB#0: @@ -5667,12 +6222,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubusb: ; SKX: # BB#0: @@ -5738,12 +6300,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubusw: ; SKX: # BB#0: @@ -5809,12 +6378,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubw: ; SKX: # BB#0: @@ -5880,12 +6456,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhbw: ; SKX: # BB#0: @@ -5951,12 +6534,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhdq: ; SKX: # BB#0: @@ -6022,12 +6612,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhwd: ; SKX: # BB#0: @@ -6093,12 +6690,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklbw: ; SKX: # BB#0: @@ -6164,12 +6768,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckldq: ; SKX: # BB#0: @@ -6235,12 +6846,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklwd: ; SKX: # BB#0: @@ -6306,12 +6924,19 @@ ; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pxor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pxor: ; SKX: # BB#0: Index: test/CodeGen/X86/movbe-schedule.ll =================================================================== --- test/CodeGen/X86/movbe-schedule.ll +++ test/CodeGen/X86/movbe-schedule.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -37,6 +38,12 @@ ; HASWELL-NEXT: movbew %si, (%rdx) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movbe_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movbew (%rdi), %ax # sched: [1:0.50] +; BROADWELL-NEXT: movbew %si, (%rdx) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movbe_i16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movbew (%rdi), %ax # sched: [6:0.50] @@ -91,6 +98,12 @@ ; HASWELL-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movbe_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movbel (%rdi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movbe_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movbel (%rdi), %eax # sched: [6:0.50] @@ -145,6 +158,12 @@ ; HASWELL-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movbe_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movbeq (%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movbe_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] Index: test/CodeGen/X86/popcnt-schedule.ll =================================================================== --- test/CodeGen/X86/popcnt-schedule.ll +++ test/CodeGen/X86/popcnt-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -43,6 +44,14 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctpop_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: popcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: popcntw %di, %ax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctpop_i16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] @@ -103,6 +112,13 @@ ; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctpop_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: popcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctpop_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] @@ -160,6 +176,13 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctpop_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: popcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctpop_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -41,6 +42,12 @@ ; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -101,6 +108,12 @@ ; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -165,6 +178,12 @@ ; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -233,6 +252,12 @@ ; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -304,6 +329,13 @@ ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33] @@ -372,6 +404,12 @@ ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -477,6 +515,20 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_comiss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_comiss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -576,6 +628,13 @@ ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -646,6 +705,13 @@ ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2ssq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2ssq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] @@ -716,6 +782,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtss2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtss2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] @@ -789,6 +862,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtss2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtss2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] @@ -862,6 +942,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttss2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttss2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] @@ -932,6 +1019,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttss2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttss2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] @@ -997,6 +1091,12 @@ ; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] @@ -1057,6 +1157,12 @@ ; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] @@ -1117,6 +1223,12 @@ ; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ldmxcsr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ldmxcsr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] @@ -1179,6 +1291,12 @@ ; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -1240,6 +1358,12 @@ ; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -1301,6 +1425,12 @@ ; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -1362,6 +1492,12 @@ ; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -1428,6 +1564,13 @@ ; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movaps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] @@ -1495,6 +1638,11 @@ ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movhlps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movhlps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] @@ -1558,6 +1706,13 @@ ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1628,6 +1783,12 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movlhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movlhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -1692,6 +1853,13 @@ ; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movlps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movlps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1757,6 +1925,11 @@ ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] @@ -1813,6 +1986,11 @@ ; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] @@ -1872,6 +2050,13 @@ ; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movss_mem: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movss_mem: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] @@ -1937,6 +2122,11 @@ ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movss_reg: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movss_reg: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] @@ -1996,6 +2186,13 @@ ; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movups: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -2060,6 +2257,12 @@ ; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -2120,6 +2323,12 @@ ; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -2184,6 +2393,12 @@ ; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_orps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -2249,6 +2464,11 @@ ; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_prefetchnta: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_prefetchnta: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] @@ -2311,6 +2531,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rcpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] @@ -2389,6 +2616,14 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rcpss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rcpss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -2468,6 +2703,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rsqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] @@ -2546,6 +2788,14 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rsqrtss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rsqrtss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -2619,6 +2869,11 @@ ; HASWELL-NEXT: sfence # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sfence: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: sfence # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sfence: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: sfence # sched: [2:0.33] @@ -2678,6 +2933,12 @@ ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] @@ -2744,6 +3005,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00] @@ -2822,6 +3090,14 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] @@ -2892,6 +3168,12 @@ ; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_stmxcsr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_stmxcsr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] @@ -2954,6 +3236,12 @@ ; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -3014,6 +3302,12 @@ ; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -3114,6 +3408,20 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ucomiss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ucomiss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -3212,6 +3520,12 @@ ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -3276,6 +3590,12 @@ ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -3340,6 +3660,12 @@ ; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -41,6 +42,12 @@ ; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -101,6 +108,12 @@ ; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -166,6 +179,13 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -240,6 +260,13 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -316,6 +343,13 @@ ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmppd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33] @@ -384,6 +418,12 @@ ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -489,6 +529,20 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_comisd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_comisd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -588,6 +642,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -661,6 +722,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] @@ -732,6 +800,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -804,6 +879,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] @@ -876,6 +958,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] @@ -948,6 +1037,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -1020,6 +1116,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsd2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1093,6 +1196,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsd2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1172,6 +1282,14 @@ ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsd2ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1246,6 +1364,13 @@ ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1316,6 +1441,13 @@ ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2sdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1394,6 +1526,14 @@ ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtss2sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtss2sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1469,6 +1609,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttpd2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttpd2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -1542,6 +1689,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttps2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttps2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] @@ -1612,6 +1766,13 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttsd2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1682,6 +1843,13 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttsd2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1747,6 +1915,12 @@ ; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] @@ -1807,6 +1981,12 @@ ; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] @@ -1868,6 +2048,11 @@ ; HASWELL-NEXT: lfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lfence: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lfence # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lfence: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: lfence # sched: [2:0.50] @@ -1924,6 +2109,11 @@ ; HASWELL-NEXT: mfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mfence: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: mfence # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mfence: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: mfence # sched: [3:0.50] @@ -1978,6 +2168,11 @@ ; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovdqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovdqu: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [2:1.00] @@ -2033,6 +2228,12 @@ ; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -2094,6 +2295,12 @@ ; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -2155,6 +2362,12 @@ ; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -2216,6 +2429,12 @@ ; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -2282,6 +2501,13 @@ ; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movapd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2351,6 +2577,13 @@ ; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movdqa: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2420,6 +2653,13 @@ ; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movdqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movdqu: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2504,6 +2744,16 @@ ; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] +; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2605,6 +2855,16 @@ ; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movd_64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movd_64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2691,6 +2951,13 @@ ; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movhpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movhpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -2763,6 +3030,13 @@ ; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movlpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movlpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -2827,6 +3101,11 @@ ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] @@ -2884,6 +3163,12 @@ ; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] @@ -2943,6 +3228,12 @@ ; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] @@ -3007,6 +3298,13 @@ ; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movq_mem: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movq_mem: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3075,6 +3373,12 @@ ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movq_reg: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movq_reg: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] @@ -3139,6 +3443,13 @@ ; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsd_mem: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsd_mem: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3205,6 +3516,11 @@ ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsd_reg: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsd_reg: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] @@ -3264,6 +3580,13 @@ ; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movupd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3328,6 +3651,12 @@ ; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -3388,6 +3717,12 @@ ; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -3453,6 +3788,13 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_orpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_orpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3526,6 +3868,12 @@ ; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packssdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3592,6 +3940,12 @@ ; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packsswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3658,6 +4012,12 @@ ; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packuswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3724,6 +4084,12 @@ ; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3788,6 +4154,12 @@ ; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3848,6 +4220,12 @@ ; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3912,6 +4290,12 @@ ; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3977,6 +4361,12 @@ ; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4042,6 +4432,12 @@ ; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4107,6 +4503,12 @@ ; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4172,6 +4574,12 @@ ; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4237,6 +4645,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pand: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4313,6 +4728,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pandn: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4384,6 +4806,12 @@ ; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4458,6 +4886,12 @@ ; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4535,6 +4969,13 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4609,6 +5050,13 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4683,6 +5131,13 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4758,6 +5213,13 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4833,6 +5295,13 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4908,6 +5377,13 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4975,6 +5451,12 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] @@ -5037,6 +5519,12 @@ ; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] @@ -5105,6 +5593,12 @@ ; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -5171,6 +5665,12 @@ ; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5236,6 +5736,12 @@ ; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5301,6 +5807,12 @@ ; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5366,6 +5878,12 @@ ; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5424,6 +5942,11 @@ ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovmskb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] @@ -5479,6 +6002,12 @@ ; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -5540,6 +6069,12 @@ ; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -5601,6 +6136,12 @@ ; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmullw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -5669,6 +6210,12 @@ ; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuludq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -5736,6 +6283,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_por: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -5809,6 +6363,12 @@ ; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -5878,6 +6438,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] +; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -5950,6 +6517,13 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6022,6 +6596,13 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshuflw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6092,6 +6673,13 @@ ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6160,6 +6748,11 @@ ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] @@ -6219,6 +6812,13 @@ ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6291,6 +6891,13 @@ ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6363,6 +6970,13 @@ ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrad: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6435,6 +7049,13 @@ ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psraw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6507,6 +7128,13 @@ ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6575,6 +7203,11 @@ ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] @@ -6634,6 +7267,13 @@ ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6706,6 +7346,13 @@ ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6777,6 +7424,12 @@ ; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6841,6 +7494,12 @@ ; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6901,6 +7560,12 @@ ; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6965,6 +7630,12 @@ ; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7030,6 +7701,12 @@ ; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7095,6 +7772,12 @@ ; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7160,6 +7843,12 @@ ; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7225,6 +7914,12 @@ ; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -7289,6 +7984,12 @@ ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] @@ -7356,6 +8057,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -7426,6 +8134,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -7495,6 +8210,12 @@ ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] @@ -7559,6 +8280,12 @@ ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] @@ -7626,6 +8353,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -7696,6 +8430,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -7765,6 +8506,12 @@ ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -7830,6 +8577,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pxor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -7900,6 +8654,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -7971,6 +8732,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] +; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] @@ -8049,6 +8817,14 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] @@ -8119,6 +8895,12 @@ ; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -8179,6 +8961,12 @@ ; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -8279,6 +9067,20 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ucomisd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ucomisd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -8378,6 +9180,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8454,6 +9263,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8524,6 +9340,13 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -41,6 +42,12 @@ ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -102,6 +109,12 @@ ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] @@ -163,6 +176,12 @@ ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] @@ -224,6 +243,12 @@ ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] @@ -285,6 +310,12 @@ ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] @@ -346,6 +377,12 @@ ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] @@ -404,6 +441,11 @@ ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lddqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] @@ -464,6 +506,13 @@ ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_monitor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: monitor # sched: [100:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -533,6 +582,13 @@ ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movddup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50] +; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] @@ -604,6 +660,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movshdup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] +; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] @@ -675,6 +738,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsldup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] +; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] @@ -745,6 +815,13 @@ ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mwait: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: mwait # sched: [20:2.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] Index: test/CodeGen/X86/sse41-schedule.ll =================================================================== --- test/CodeGen/X86/sse41-schedule.ll +++ test/CodeGen/X86/sse41-schedule.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -38,6 +39,13 @@ ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -98,6 +106,12 @@ ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -158,6 +172,12 @@ ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] @@ -219,6 +239,12 @@ ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] @@ -274,6 +300,12 @@ ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_dppd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_dppd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] @@ -329,6 +361,12 @@ ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_dpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] +; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] @@ -384,6 +422,12 @@ ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_insertps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_insertps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] @@ -435,6 +479,11 @@ ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -484,6 +533,12 @@ ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mpsadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] @@ -540,6 +595,12 @@ ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packusdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -602,6 +663,12 @@ ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendvb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] @@ -657,6 +724,12 @@ ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -711,6 +784,12 @@ ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -769,6 +848,12 @@ ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] @@ -824,6 +909,12 @@ ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] @@ -878,6 +969,12 @@ ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] @@ -932,6 +1029,12 @@ ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] @@ -987,6 +1090,12 @@ ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phminposuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phminposuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] @@ -1042,6 +1151,12 @@ ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1096,6 +1211,12 @@ ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1154,6 +1275,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1213,6 +1341,12 @@ ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1268,6 +1402,12 @@ ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1323,6 +1463,12 @@ ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1378,6 +1524,12 @@ ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1433,6 +1585,12 @@ ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1488,6 +1646,12 @@ ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1543,6 +1707,12 @@ ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1598,6 +1768,12 @@ ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1658,6 +1834,13 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1723,6 +1906,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -1788,6 +1978,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -1853,6 +2050,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -1918,6 +2122,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -1983,6 +2194,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2048,6 +2266,13 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2113,6 +2338,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2178,6 +2410,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2243,6 +2482,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2308,6 +2554,13 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2373,6 +2626,13 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2433,6 +2693,12 @@ ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -2489,6 +2755,12 @@ ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67] @@ -2559,6 +2831,16 @@ ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ptest: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: setb %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ptest: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2636,6 +2918,13 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] @@ -2701,6 +2990,13 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] @@ -2767,6 +3063,13 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25] +; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] @@ -2833,6 +3136,13 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25] +; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -38,6 +39,13 @@ ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_32_8: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -101,6 +109,13 @@ ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_32_16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -164,6 +179,13 @@ ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_32_32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -227,6 +249,13 @@ ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_64_8: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -290,6 +319,13 @@ ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_64_64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] +; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -377,6 +413,19 @@ ; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpestri: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; BROADWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; BROADWELL-NEXT: # kill: %ECX %ECX %RCX +; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpestri: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] @@ -477,6 +526,16 @@ ; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpestrm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpestrm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] @@ -560,6 +619,15 @@ ; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpistri: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: movl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: # kill: %ECX %ECX %RCX +; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpistri: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] @@ -628,6 +696,12 @@ ; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpistrm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpistrm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] @@ -683,6 +757,12 @@ ; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -741,6 +821,12 @@ ; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pclmulqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00] +; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pclmulqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] Index: test/CodeGen/X86/ssse3-schedule.ll =================================================================== --- test/CodeGen/X86/ssse3-schedule.ll +++ test/CodeGen/X86/ssse3-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -47,6 +48,13 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] @@ -119,6 +127,13 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] @@ -191,6 +206,13 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] @@ -262,6 +284,12 @@ ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_palignr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] @@ -322,6 +350,12 @@ ; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -383,6 +417,12 @@ ; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -444,6 +484,12 @@ ; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -505,6 +551,12 @@ ; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -566,6 +618,12 @@ ; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -627,6 +685,12 @@ ; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -688,6 +752,12 @@ ; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -750,6 +820,12 @@ ; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhrsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] @@ -811,6 +887,12 @@ ; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -876,6 +958,12 @@ ; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -941,6 +1029,12 @@ ; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1006,6 +1100,12 @@ ; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]