Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -878,10 +878,10 @@ } def: InstRW<[BWWriteResGroup45], (instrs FNCLEX)>; -def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> { - let Latency = 4; +def BWWriteResGroup46 : SchedWriteRes<[]> { + let Latency = 0; let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let ResourceCycles = []; } def: InstRW<[BWWriteResGroup46], (instrs VZEROUPPER)>; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -1408,10 +1408,10 @@ } def: InstRW<[HWWriteResGroup81], (instrs FNCLEX)>; -def HWWriteResGroup82 : SchedWriteRes<[HWPort015,HWPort0156]> { - let Latency = 4; +def HWWriteResGroup82 : SchedWriteRes<[]> { + let Latency = 0; let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let ResourceCycles = []; } def: InstRW<[HWWriteResGroup82], (instrs VZEROUPPER)>; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -1112,6 +1112,13 @@ } def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>; +def SBWriteResGroupVzeroupper : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 4; + let ResourceCycles = []; +} +def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>; + def: InstRW<[WriteZero], (instrs CLC)>; // Intruction variants handled by the renamer. These might not need execution Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -897,10 +897,10 @@ } def: InstRW<[SKLWriteResGroup55], (instrs PAUSE)>; -def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> { - let Latency = 4; +def SKLWriteResGroup56 : SchedWriteRes<[]> { + let Latency = 0; let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let ResourceCycles = []; } def: InstRW<[SKLWriteResGroup56], (instrs VZEROUPPER)>; Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -1009,10 +1009,10 @@ } def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>; -def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> { - let Latency = 4; +def SKXWriteResGroup56 : SchedWriteRes<[]> { + let Latency = 0; let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let ResourceCycles = []; } def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>; Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -1843,42 +1843,42 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_extractf128: ; SANDY: # %bb.0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_extractf128: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_extractf128: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_extractf128: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_extractf128: ; SKX: # %bb.0: ; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_extractf128: @@ -3008,37 +3008,37 @@ ; GENERIC-LABEL: test_movmskpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movmskpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_movmskpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movmskpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_movmskpd: @@ -3066,37 +3066,37 @@ ; GENERIC-LABEL: test_movmskps: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movmskps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_movmskps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movmskps: ; SKX: # %bb.0: ; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_movmskps: @@ -3126,7 +3126,7 @@ ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movntdq: @@ -3134,7 +3134,7 @@ ; SANDY-NEXT: #APP ; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntdq: @@ -3142,7 +3142,7 @@ ; HASWELL-NEXT: #APP ; HASWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_movntdq: @@ -3150,7 +3150,7 @@ ; BROADWELL-NEXT: #APP ; BROADWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntdq: @@ -3158,7 +3158,7 @@ ; SKYLAKE-NEXT: #APP ; SKYLAKE-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntdq: @@ -3166,7 +3166,7 @@ ; SKX-NEXT: #APP ; SKX-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_movntdq: @@ -5240,7 +5240,7 @@ ; GENERIC-NEXT: setb %al # sched: [1:0.50] ; GENERIC-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_testpd_ymm: @@ -5250,7 +5250,7 @@ ; SANDY-NEXT: setb %al # sched: [1:0.50] ; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testpd_ymm: @@ -5260,7 +5260,7 @@ ; HASWELL-NEXT: setb %al # sched: [1:0.50] ; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_testpd_ymm: @@ -5270,7 +5270,7 @@ ; BROADWELL-NEXT: setb %al # sched: [1:0.50] ; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_testpd_ymm: @@ -5280,7 +5280,7 @@ ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] ; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_testpd_ymm: @@ -5290,7 +5290,7 @@ ; SKX-NEXT: setb %al # sched: [1:0.50] ; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_testpd_ymm: @@ -5426,7 +5426,7 @@ ; GENERIC-NEXT: setb %al # sched: [1:0.50] ; GENERIC-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_testps_ymm: @@ -5436,7 +5436,7 @@ ; SANDY-NEXT: setb %al # sched: [1:0.50] ; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testps_ymm: @@ -5446,7 +5446,7 @@ ; HASWELL-NEXT: setb %al # sched: [1:0.50] ; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_testps_ymm: @@ -5456,7 +5456,7 @@ ; BROADWELL-NEXT: setb %al # sched: [1:0.50] ; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_testps_ymm: @@ -5466,7 +5466,7 @@ ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] ; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_testps_ymm: @@ -5476,7 +5476,7 @@ ; SKX-NEXT: setb %al # sched: [1:0.50] ; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_testps_ymm: @@ -5976,32 +5976,32 @@ define void @test_zeroupper() { ; GENERIC-LABEL: test_zeroupper: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_zeroupper: ; SANDY: # %bb.0: -; SANDY-NEXT: vzeroupper # sched: [100:0.33] +; SANDY-NEXT: vzeroupper # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_zeroupper: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_zeroupper: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_zeroupper: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_zeroupper: ; SKX: # %bb.0: -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_zeroupper: Index: test/CodeGen/X86/avx2-schedule.ll =================================================================== --- test/CodeGen/X86/avx2-schedule.ll +++ test/CodeGen/X86/avx2-schedule.ll @@ -177,7 +177,7 @@ ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] ; GENERIC-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_extracti128: @@ -185,7 +185,7 @@ ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] ; HASWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_extracti128: @@ -193,7 +193,7 @@ ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] ; BROADWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_extracti128: @@ -201,7 +201,7 @@ ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] ; SKYLAKE-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_extracti128: @@ -209,7 +209,7 @@ ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] ; SKX-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_extracti128: @@ -474,31 +474,31 @@ ; GENERIC-LABEL: test_gatherqps_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherqps_ymm: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_gatherqps_ymm: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherqps_ymm: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherqps_ymm: ; SKX: # %bb.0: ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherqps_ymm: @@ -2920,31 +2920,31 @@ ; GENERIC-LABEL: test_pgatherqd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherqd_ymm: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_pgatherqd_ymm: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherqd_ymm: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherqd_ymm: ; SKX: # %bb.0: ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherqd_ymm: @@ -4089,31 +4089,31 @@ ; GENERIC-LABEL: test_pmovmskb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_pmovmskb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovmskb: ; SKX: # %bb.0: ; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovmskb: Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -1447,13 +1447,13 @@ ; GENERIC-LABEL: slto4f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: slto4f32: ; SKX: # %bb.0: ; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <4 x i64> %a to <4 x float> ret <4 x float> %b @@ -1463,13 +1463,13 @@ ; GENERIC-LABEL: ulto4f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ulto4f32: ; SKX: # %bb.0: ; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <4 x i64> %a to <4 x float> ret <4 x float> %b @@ -1538,14 +1538,14 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32to16uc: ; SKX: # %bb.0: ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %res = fptoui <16 x float> %f to <16 x i8> ret <16 x i8> %res @@ -1614,14 +1614,14 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to8us: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %res = fptoui <8 x double> %f to <8 x i16> ret <8 x i16> %res @@ -1632,14 +1632,14 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to8uc: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %res = fptoui <8 x double> %f to <8 x i8> ret <8 x i8> %res @@ -1649,13 +1649,13 @@ ; GENERIC-LABEL: f64to4ui: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to4ui: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <4 x double> %a to <4 x i32> ret <4 x i32> %b @@ -1737,13 +1737,13 @@ ; GENERIC-LABEL: f64to4si: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to4si: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi <4 x double> %a to <4 x i32> ret <4 x i32> %b @@ -1771,13 +1771,13 @@ ; GENERIC-LABEL: f64to4f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to4f32: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %a = fptrunc <4 x double> %b to <4 x float> ret <4 x float> %a @@ -1789,7 +1789,7 @@ ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64to4f32_mask: @@ -1797,7 +1797,7 @@ ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] ; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %a = fptrunc <4 x double> %b to <4 x float> %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer @@ -4307,7 +4307,7 @@ ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: trunc_16i32_to_16i1: @@ -4316,7 +4316,7 @@ ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %mask_b = trunc <16 x i32>%a to <16 x i1> %mask = bitcast <16 x i1> %mask_b to i16 @@ -4422,14 +4422,14 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8i1_8i16: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %x = icmp slt <8 x i32> %a1, %a2 %y = sext <8 x i1> %x to <8 x i16> @@ -4475,14 +4475,14 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: extload_v8i64: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] ; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %sign_load = load <8 x i8>, <8 x i8>* %a %c = sext <8 x i8> %sign_load to <8 x i64> @@ -6124,13 +6124,13 @@ ; GENERIC-LABEL: mov_test18: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test18: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 64 @@ -6141,13 +6141,13 @@ ; GENERIC-LABEL: mov_test19: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test19: ; SKX: # %bb.0: ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 1 @@ -6158,13 +6158,13 @@ ; GENERIC-LABEL: mov_test20: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test20: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 64 @@ -6190,13 +6190,13 @@ ; GENERIC-LABEL: mov_test22: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test22: ; SKX: # %bb.0: ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 1 @@ -6222,13 +6222,13 @@ ; GENERIC-LABEL: mov_test24: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test24: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 64 @@ -6254,13 +6254,13 @@ ; GENERIC-LABEL: mov_test26: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test26: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 64 @@ -6286,13 +6286,13 @@ ; GENERIC-LABEL: mov_test28: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test28: ; SKX: # %bb.0: ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 1 @@ -6318,13 +6318,13 @@ ; GENERIC-LABEL: mov_test30: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test30: ; SKX: # %bb.0: ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 1 @@ -6877,7 +6877,7 @@ ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_test1: @@ -6886,7 +6886,7 @@ ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: andl $1, %eax # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 @@ -6902,7 +6902,7 @@ ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_test2: @@ -6912,7 +6912,7 @@ ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: andl $1, %eax # sched: [1:0.25] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 @@ -6928,7 +6928,7 @@ ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_test3: @@ -6938,7 +6938,7 @@ ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: andb $1, %al # sched: [1:0.25] ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 @@ -6976,7 +6976,7 @@ ; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50] ; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test4: @@ -6984,7 +6984,7 @@ ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00] ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00] ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %x_gt_y = icmp sgt <4 x i64> %x, %y %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 @@ -7061,13 +7061,13 @@ ; GENERIC-NEXT: # %bb.2: ; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB386_1: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] ; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test8: @@ -7077,13 +7077,13 @@ ; SKX-NEXT: # %bb.2: ; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB386_1: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %cond = icmp sgt i32 %a1, %b1 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer @@ -7617,11 +7617,11 @@ ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB410_2: # %L2 ; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ktest_1: @@ -7634,11 +7634,11 @@ ; SKX-NEXT: je .LBB410_2 # sched: [1:0.50] ; SKX-NEXT: # %bb.1: # %L1 ; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB410_2: # %L2 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %addr1 = getelementptr double, double * %base, i64 0 %addr2 = getelementptr double, double * %base, i64 1 @@ -7686,12 +7686,12 @@ ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB411_2: # %L2 ; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ktest_2: @@ -7711,12 +7711,12 @@ ; SKX-NEXT: # %bb.1: # %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB411_2: # %L2 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %addr1 = getelementptr float, float * %base, i64 0 %addr2 = getelementptr float, float * %base, i64 1 @@ -7908,7 +7908,7 @@ ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: store_32i1: @@ -7916,7 +7916,7 @@ ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] store <32 x i1> %v, <32 x i1>* %a ret void @@ -7928,7 +7928,7 @@ ; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: store_32i1_1: @@ -7936,7 +7936,7 @@ ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %v1 = trunc <32 x i16> %v to <32 x i1> store <32 x i1> %v1, <32 x i1>* %a @@ -7951,7 +7951,7 @@ ; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: store_64i1: @@ -7959,7 +7959,7 @@ ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] store <64 x i1> %v, <64 x i1>* %a ret void @@ -7971,7 +7971,7 @@ ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_bitcast_v8i1_zext: @@ -7979,7 +7979,7 @@ ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00] ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> @@ -7995,7 +7995,7 @@ ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_bitcast_v16i1_zext: @@ -8003,7 +8003,7 @@ ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask1 = bitcast <16 x i1> %v1 to i16 Index: test/CodeGen/X86/avx512vpopcntdq-schedule.ll =================================================================== --- test/CodeGen/X86/avx512vpopcntdq-schedule.ll +++ test/CodeGen/X86/avx512vpopcntdq-schedule.ll @@ -18,7 +18,7 @@ ; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50] ; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ICELAKE-LABEL: test_vpopcntd: @@ -35,7 +35,7 @@ ; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00] ; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00] ; ICELAKE-NEXT: #NO_APP -; ICELAKE-NEXT: vzeroupper # sched: [4:1.00] +; ICELAKE-NEXT: vzeroupper # sched: [0:0.67] ; ICELAKE-NEXT: retq # sched: [7:1.00] tail call void asm "vpopcntd $1, $0 \0A\09 vpopcntd $1, $0 {$3} \0A\09 vpopcntd $1, $0 {$3} {z} \0A\09 vpopcntd $2, $0 \0A\09 vpopcntd $2, $0 {$3} \0A\09 vpopcntd $2, $0 {$3} {z} \0A\09 vpopcntd $2{1to16}, $0 \0A\09 vpopcntd $2{1to16}, $0 {$3} \0A\09 vpopcntd $2{1to16}, $0 {$3} {z}", "v,v,*m,^Yk"(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 %a3) nounwind ret void @@ -56,7 +56,7 @@ ; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50] ; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ICELAKE-LABEL: test_vpopcntq: @@ -73,7 +73,7 @@ ; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00] ; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00] ; ICELAKE-NEXT: #NO_APP -; ICELAKE-NEXT: vzeroupper # sched: [4:1.00] +; ICELAKE-NEXT: vzeroupper # sched: [0:0.67] ; ICELAKE-NEXT: retq # sched: [7:1.00] tail call void asm "vpopcntq $1, $0 \0A\09 vpopcntq $1, $0 {$3} \0A\09 vpopcntq $1, $0 {$3} {z} \0A\09 vpopcntq $2, $0 \0A\09 vpopcntq $2, $0 {$3} \0A\09 vpopcntq $2, $0 {$3} {z} \0A\09 vpopcntq $2{1to8}, $0 \0A\09 vpopcntq $2{1to8}, $0 {$3} \0A\09 vpopcntq $2{1to8}, $0 {$3} {z}", "v,v,*m,^Yk"(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) nounwind ret void Index: test/CodeGen/X86/f16c-schedule.ll =================================================================== --- test/CodeGen/X86/f16c-schedule.ll +++ test/CodeGen/X86/f16c-schedule.ll @@ -197,35 +197,35 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtps2ph_256: ; IVY: # %bb.0: ; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] ; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] -; IVY-NEXT: vzeroupper # sched: [100:0.33] +; IVY-NEXT: vzeroupper # sched: [1:1.00] ; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtps2ph_256: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vcvtps2ph_256: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vcvtps2ph_256: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BDVER2-LABEL: test_vcvtps2ph_256: Index: test/CodeGen/X86/fma-schedule.ll =================================================================== --- test/CodeGen/X86/fma-schedule.ll +++ test/CodeGen/X86/fma-schedule.ll @@ -123,7 +123,7 @@ ; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] ; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmaddpd_256: @@ -149,7 +149,7 @@ ; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] ; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmaddpd_256: @@ -162,7 +162,7 @@ ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddpd_256: @@ -175,7 +175,7 @@ ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddpd_256: @@ -200,7 +200,7 @@ ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddpd_256: @@ -330,7 +330,7 @@ ; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] ; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmaddps_256: @@ -356,7 +356,7 @@ ; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] ; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmaddps_256: @@ -369,7 +369,7 @@ ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddps_256: @@ -382,7 +382,7 @@ ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddps_256: @@ -407,7 +407,7 @@ ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddps_256: @@ -741,7 +741,7 @@ ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmaddsubpd_256: @@ -767,7 +767,7 @@ ; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] ; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmaddsubpd_256: @@ -780,7 +780,7 @@ ; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddsubpd_256: @@ -793,7 +793,7 @@ ; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddsubpd_256: @@ -818,7 +818,7 @@ ; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddsubpd_256: @@ -948,7 +948,7 @@ ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmaddsubps_256: @@ -974,7 +974,7 @@ ; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] ; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmaddsubps_256: @@ -987,7 +987,7 @@ ; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddsubps_256: @@ -1000,7 +1000,7 @@ ; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddsubps_256: @@ -1025,7 +1025,7 @@ ; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddsubps_256: @@ -1159,7 +1159,7 @@ ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmsubaddpd_256: @@ -1185,7 +1185,7 @@ ; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] ; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmsubaddpd_256: @@ -1198,7 +1198,7 @@ ; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubaddpd_256: @@ -1211,7 +1211,7 @@ ; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubaddpd_256: @@ -1236,7 +1236,7 @@ ; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubaddpd_256: @@ -1366,7 +1366,7 @@ ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmsubaddps_256: @@ -1392,7 +1392,7 @@ ; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] ; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmsubaddps_256: @@ -1405,7 +1405,7 @@ ; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubaddps_256: @@ -1418,7 +1418,7 @@ ; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubaddps_256: @@ -1443,7 +1443,7 @@ ; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubaddps_256: @@ -1577,7 +1577,7 @@ ; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] ; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmsubpd_256: @@ -1603,7 +1603,7 @@ ; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] ; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmsubpd_256: @@ -1616,7 +1616,7 @@ ; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubpd_256: @@ -1629,7 +1629,7 @@ ; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubpd_256: @@ -1654,7 +1654,7 @@ ; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubpd_256: @@ -1784,7 +1784,7 @@ ; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] ; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfmsubps_256: @@ -1810,7 +1810,7 @@ ; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] ; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfmsubps_256: @@ -1823,7 +1823,7 @@ ; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubps_256: @@ -1836,7 +1836,7 @@ ; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubps_256: @@ -1861,7 +1861,7 @@ ; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubps_256: @@ -2195,7 +2195,7 @@ ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfnmaddpd_256: @@ -2221,7 +2221,7 @@ ; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] ; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfnmaddpd_256: @@ -2234,7 +2234,7 @@ ; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmaddpd_256: @@ -2247,7 +2247,7 @@ ; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmaddpd_256: @@ -2272,7 +2272,7 @@ ; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmaddpd_256: @@ -2402,7 +2402,7 @@ ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfnmaddps_256: @@ -2428,7 +2428,7 @@ ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] ; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfnmaddps_256: @@ -2441,7 +2441,7 @@ ; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmaddps_256: @@ -2454,7 +2454,7 @@ ; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmaddps_256: @@ -2479,7 +2479,7 @@ ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmaddps_256: @@ -2813,7 +2813,7 @@ ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfnmsubpd_256: @@ -2839,7 +2839,7 @@ ; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] ; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfnmsubpd_256: @@ -2852,7 +2852,7 @@ ; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsubpd_256: @@ -2865,7 +2865,7 @@ ; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsubpd_256: @@ -2890,7 +2890,7 @@ ; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsubpd_256: @@ -3020,7 +3020,7 @@ ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER2-LABEL: test_vfnmsubps_256: @@ -3046,7 +3046,7 @@ ; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] ; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] ; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [0:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_vfnmsubps_256: @@ -3059,7 +3059,7 @@ ; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] ; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsubps_256: @@ -3072,7 +3072,7 @@ ; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] ; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] +; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsubps_256: @@ -3097,7 +3097,7 @@ ; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] ; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: vzeroupper # sched: [0:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsubps_256: Index: test/CodeGen/X86/fma4-schedule.ll =================================================================== --- test/CodeGen/X86/fma4-schedule.ll +++ test/CodeGen/X86/fma4-schedule.ll @@ -48,7 +48,7 @@ ; GENERIC-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmaddpd_256: @@ -113,7 +113,7 @@ ; GENERIC-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmaddps_256: @@ -244,7 +244,7 @@ ; GENERIC-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmaddsubpd_256: @@ -309,7 +309,7 @@ ; GENERIC-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmaddsubps_256: @@ -378,7 +378,7 @@ ; GENERIC-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmsubaddpd_256: @@ -443,7 +443,7 @@ ; GENERIC-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmsubaddps_256: @@ -512,7 +512,7 @@ ; GENERIC-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmsubpd_256: @@ -577,7 +577,7 @@ ; GENERIC-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfmsubps_256: @@ -708,7 +708,7 @@ ; GENERIC-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfnmaddpd_256: @@ -773,7 +773,7 @@ ; GENERIC-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfnmaddps_256: @@ -904,7 +904,7 @@ ; GENERIC-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfnmsubpd_256: @@ -969,7 +969,7 @@ ; GENERIC-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfnmsubps_256: Index: test/CodeGen/X86/xop-schedule.ll =================================================================== --- test/CodeGen/X86/xop-schedule.ll +++ test/CodeGen/X86/xop-schedule.ll @@ -14,7 +14,7 @@ ; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfrczpd: @@ -62,7 +62,7 @@ ; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vfrczps: @@ -221,7 +221,7 @@ ; GENERIC-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vpcmov_256: @@ -425,7 +425,7 @@ ; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vpermil2pd_256: @@ -509,7 +509,7 @@ ; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] +; GENERIC-NEXT: vzeroupper # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER12-LABEL: test_vpermil2ps_256: Index: test/tools/llvm-mca/X86/Broadwell/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1720,7 +1720,7 @@ # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 7 1.00 * vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 20 8 1.00 * * U vzeroall -# CHECK-NEXT: 4 4 1.00 * * U vzeroupper +# CHECK-NEXT: 4 0 1.00 * * U vzeroupper # CHECK: Resources: # CHECK-NEXT: [0] - BWDivider @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.33 236.33 176.17 176.17 38.00 427.33 3.00 12.67 +# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 426.25 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2430,4 +2430,4 @@ # CHECK-NEXT: - - - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 1.00 - vzeroall -# CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper +# CHECK-NEXT: - - - - - - - - - - vzeroupper Index: test/tools/llvm-mca/X86/Generic/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/Generic/resources-avx1.s +++ test/tools/llvm-mca/X86/Generic/resources-avx1.s @@ -1720,7 +1720,7 @@ # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 20 9 2.00 * * U vzeroall -# CHECK-NEXT: 1 100 0.33 * * U vzeroupper +# CHECK-NEXT: 4 1 1.00 * * U vzeroupper # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -1734,7 +1734,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 246.83 317.33 39.00 365.83 179.50 179.50 +# CHECK-NEXT: - 572.00 246.50 317.00 39.00 365.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2428,4 +2428,4 @@ # CHECK-NEXT: - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 2.00 - - vzeroall -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroupper +# CHECK-NEXT: - - - - - - - - vzeroupper Index: test/tools/llvm-mca/X86/Haswell/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1720,7 +1720,7 @@ # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 20 8 1.00 * * U vzeroall -# CHECK-NEXT: 4 4 1.00 * * U vzeroupper +# CHECK-NEXT: 4 0 1.00 * * U vzeroupper # CHECK: Resources: # CHECK-NEXT: [0] - HWDivider @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.67 237.67 176.17 176.17 38.00 430.67 3.00 12.67 +# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 429.58 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2430,4 +2430,4 @@ # CHECK-NEXT: - - - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 1.00 - vzeroall -# CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper +# CHECK-NEXT: - - - - - - - - - - vzeroupper Index: test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s +++ test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s @@ -1720,7 +1720,7 @@ # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 20 9 2.00 * * U vzeroall -# CHECK-NEXT: 1 100 0.33 * * U vzeroupper +# CHECK-NEXT: 4 1 1.00 * * U vzeroupper # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -1734,7 +1734,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 246.83 317.33 39.00 365.83 179.50 179.50 +# CHECK-NEXT: - 572.00 246.50 317.00 39.00 365.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2428,4 +2428,4 @@ # CHECK-NEXT: - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 2.00 - - vzeroall -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroupper +# CHECK-NEXT: - - - - - - - - vzeroupper Index: test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1720,7 +1720,7 @@ # CHECK-NEXT: 1 1 0.33 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 16 16 4.00 * * U vzeroall -# CHECK-NEXT: 4 4 1.00 * * U vzeroupper +# CHECK-NEXT: 4 0 0.67 * * U vzeroupper # CHECK: Resources: # CHECK-NEXT: [0] - SKLDivider @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 123.00 333.17 204.17 173.17 173.17 34.00 324.67 6.00 12.67 +# CHECK-NEXT: - 123.00 332.08 203.08 173.17 173.17 34.00 323.58 5.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2430,4 +2430,4 @@ # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 4.00 4.00 - - - 4.00 4.00 - vzeroall -# CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper +# CHECK-NEXT: - - - - - - - - - - vzeroupper Index: test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1720,7 +1720,7 @@ # CHECK-NEXT: 1 1 0.33 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 34 12 5.00 * * U vzeroall -# CHECK-NEXT: 4 4 1.00 * * U vzeroupper +# CHECK-NEXT: 4 0 0.67 * * U vzeroupper # CHECK: Resources: # CHECK-NEXT: [0] - SKXDivider @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 123.00 318.50 197.50 173.17 173.17 34.00 339.00 7.00 12.67 +# CHECK-NEXT: - 123.00 317.42 196.42 173.17 173.17 34.00 337.92 6.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2430,4 +2430,4 @@ # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 2.00 1.00 - - - 2.00 5.00 - vzeroall -# CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper +# CHECK-NEXT: - - - - - - - - - - vzeroupper