Index: lib/Target/X86/X86PfmCounters.td =================================================================== --- lib/Target/X86/X86PfmCounters.td +++ lib/Target/X86/X86PfmCounters.td @@ -20,6 +20,7 @@ "uops_dispatched_port:port_3"]>; def SBPort4Counter : PfmIssueCounter; def SBPort5Counter : PfmIssueCounter; +def SBUopsCounter : PfmUopsCounter<"uops_issued:any">; } let SchedModel = HaswellModel in { @@ -45,6 +46,7 @@ def BWPort5Counter : PfmIssueCounter; def BWPort6Counter : PfmIssueCounter; def BWPort7Counter : PfmIssueCounter; +def BWUopsCounter : PfmUopsCounter<"uops_issued:any">; } let SchedModel = SkylakeClientModel in { @@ -69,6 +71,7 @@ def SKXPort5Counter : PfmIssueCounter; def SKXPort6Counter : PfmIssueCounter; def SKXPort7Counter : PfmIssueCounter; +def SKXUopsCounter : PfmUopsCounter<"uops_issued:any">; } let SchedModel = BtVer2Model in { Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -1374,10 +1374,10 @@ } def: InstRW<[BWWriteResGroup153], (instrs CMPXCHG8B)>; -def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> { - let Latency = 16; - let NumMicroOps = 16; - let ResourceCycles = [16]; +def BWWriteResGroup154 : SchedWriteRes<[BWPort5,BWPort6]> { + let Latency = 8; + let NumMicroOps = 20; + let ResourceCycles = [1,1]; } def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -1617,10 +1617,10 @@ } def: InstRW<[HWWriteResGroup144], (instrs INSB, INSL, INSW)>; -def HWWriteResGroup145 : SchedWriteRes<[HWPort5]> { - let Latency = 16; - let NumMicroOps = 16; - let ResourceCycles = [16]; +def HWWriteResGroup145 : SchedWriteRes<[HWPort5, HWPort6]> { + let Latency = 8; + let NumMicroOps = 20; + let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup145], (instrs VZEROALL)>; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -1104,6 +1104,13 @@ } def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>; +def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> { + let Latency = 9; + let NumMicroOps = 20; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>; + def: InstRW<[WriteZero], (instrs CLC)>; // Intruction variants handled by the renamer. These might not need execution Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -2086,10 +2086,10 @@ } def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>; -def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> { - let Latency = 16; - let NumMicroOps = 16; - let ResourceCycles = [16]; +def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> { + let Latency = 12; + let NumMicroOps = 34; + let ResourceCycles = [1, 4, 5]; } def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>; Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -5339,22 +5339,22 @@ define void @test_zeroall() { ; GENERIC-LABEL: test_zeroall: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vzeroall # sched: [100:0.33] +; GENERIC-NEXT: vzeroall # sched: [9:2.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_zeroall: ; SANDY: # %bb.0: -; SANDY-NEXT: vzeroall # sched: [100:0.33] +; SANDY-NEXT: vzeroall # sched: [9:2.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_zeroall: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vzeroall # sched: [16:16.00] +; HASWELL-NEXT: vzeroall # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_zeroall: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vzeroall # sched: [16:16.00] +; BROADWELL-NEXT: vzeroall # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_zeroall: @@ -5364,7 +5364,7 @@ ; ; SKX-LABEL: test_zeroall: ; SKX: # %bb.0: -; SKX-NEXT: vzeroall # sched: [16:4.00] +; SKX-NEXT: vzeroall # sched: [12:5.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_zeroall: Index: test/tools/llvm-mca/X86/Broadwell/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1719,7 +1719,7 @@ # CHECK-NEXT: 2 6 1.00 * vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 7 1.00 * vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 16 16 16.00 * * U vzeroall +# CHECK-NEXT: 20 8 1.00 * * U vzeroall # CHECK-NEXT: 4 4 1.00 * * U vzeroupper # CHECK: Resources: @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.33 236.33 176.17 176.17 38.00 442.33 2.00 12.67 +# CHECK-NEXT: - 257.00 216.33 236.33 176.17 176.17 38.00 427.33 3.00 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2429,5 +2429,5 @@ # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 16.00 - - vzeroall +# CHECK-NEXT: - - - - - - - 1.00 1.00 - vzeroall # CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper Index: test/tools/llvm-mca/X86/Generic/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/Generic/resources-avx1.s +++ test/tools/llvm-mca/X86/Generic/resources-avx1.s @@ -1719,7 +1719,7 @@ # CHECK-NEXT: 2 7 1.00 * vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.33 * * U vzeroall +# CHECK-NEXT: 20 9 2.00 * * U vzeroall # CHECK-NEXT: 1 100 0.33 * * U vzeroupper # CHECK: Resources: @@ -1734,7 +1734,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 247.17 317.67 39.00 364.17 179.50 179.50 +# CHECK-NEXT: - 572.00 246.83 317.33 39.00 365.83 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2427,5 +2427,5 @@ # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroall +# CHECK-NEXT: - - - - - 2.00 - - vzeroall # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroupper Index: test/tools/llvm-mca/X86/Haswell/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1719,7 +1719,7 @@ # CHECK-NEXT: 2 7 1.00 * vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 16 16 16.00 * * U vzeroall +# CHECK-NEXT: 20 8 1.00 * * U vzeroall # CHECK-NEXT: 4 4 1.00 * * U vzeroupper # CHECK: Resources: @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.67 237.67 176.17 176.17 38.00 445.67 2.00 12.67 +# CHECK-NEXT: - 336.00 215.67 237.67 176.17 176.17 38.00 430.67 3.00 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2429,5 +2429,5 @@ # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 16.00 - - vzeroall +# CHECK-NEXT: - - - - - - - 1.00 1.00 - vzeroall # CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper Index: test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s +++ test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s @@ -1719,7 +1719,7 @@ # CHECK-NEXT: 2 7 1.00 * vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.33 * * U vzeroall +# CHECK-NEXT: 20 9 2.00 * * U vzeroall # CHECK-NEXT: 1 100 0.33 * * U vzeroupper # CHECK: Resources: @@ -1734,7 +1734,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 247.17 317.67 39.00 364.17 179.50 179.50 +# CHECK-NEXT: - 572.00 246.83 317.33 39.00 365.83 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2427,5 +2427,5 @@ # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroall +# CHECK-NEXT: - - - - - 2.00 - - vzeroall # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroupper Index: test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1719,7 +1719,7 @@ # CHECK-NEXT: 2 7 0.50 * vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 16 16 4.00 * * U vzeroall +# CHECK-NEXT: 34 12 5.00 * * U vzeroall # CHECK-NEXT: 4 4 1.00 * * U vzeroupper # CHECK: Resources: @@ -1736,7 +1736,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 123.00 320.50 200.50 173.17 173.17 34.00 341.00 6.00 12.67 +# CHECK-NEXT: - 123.00 318.50 197.50 173.17 173.17 34.00 339.00 7.00 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2429,5 +2429,5 @@ # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vxorps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 4.00 4.00 - - - 4.00 4.00 - vzeroall +# CHECK-NEXT: - - 2.00 1.00 - - - 2.00 5.00 - vzeroall # CHECK-NEXT: - - 1.08 1.08 - - - 1.08 0.75 - vzeroupper