Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -499,6 +499,7 @@ include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" +include "X86ScheduleZnver2.td" include "X86ScheduleBdVer2.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" @@ -1188,7 +1189,7 @@ def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; -def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>; +def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>; def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, Feature3DNowA]>; Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -2845,7 +2845,7 @@ //===----------------------------------------------------------------------===// // CLZERO Instruction // -let SchedRW = [WriteSystem] in { +let SchedRW = [WriteLoad] in { let Uses = [EAX] in def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB, Requires<[HasCLZERO, Not64BitMode]>; Index: llvm/test/MC/X86/x86_long_nop.s =================================================================== --- llvm/test/MC/X86/x86_long_nop.s +++ llvm/test/MC/X86/x86_long_nop.s @@ -13,6 +13,8 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=btver2 | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver1 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver1 | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver2 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # Ensure alignment directives also emit sequences of 10, 11 and 15-byte NOPs on processors # capable of using long NOPs. Index: llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s =================================================================== --- llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s +++ llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s @@ -12,7 +12,7 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 100 0.33 U clzero +# CHECK-NEXT: 1 5 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -26,8 +26,8 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - +# CHECK-NEXT: - - - - - - 0.50 0.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - clzero +# CHECK-NEXT: - - - - - - 0.50 0.50 clzero Index: llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s =================================================================== --- llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s +++ llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s @@ -12,7 +12,7 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 100 0.25 U clzero +# CHECK-NEXT: 1 8 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 @@ -30,8 +30,8 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - - - - - - - - - - - +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: -# CHECK-NEXT: - - - - - - - - - - - - clzero +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - clzero Index: llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s =================================================================== --- llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -5,6 +5,7 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 add %edi, %esi bextrl %esi, (%rdi), %eax @@ -30,6 +31,9 @@ # ZNVER1-NEXT: Total Cycles: 8 # ZNVER1-NEXT: Total uOps: 3 +# ZNVER2-NEXT: Total Cycles: 8 +# ZNVER2-NEXT: Total uOps: 3 + # BDVER2: Dispatch Width: 4 # BDVER2-NEXT: uOps Per Cycle: 0.33 # BDVER2-NEXT: IPC: 0.22 Index: llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s =================================================================== --- llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s +++ llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s @@ -3,6 +3,7 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 add %edi, %esi bzhil %esi, (%rdi), %eax @@ -14,6 +15,7 @@ # HASWELL-NEXT: Total Cycles: 9 # SKYLAKE-NEXT: Total Cycles: 9 # ZNVER1-NEXT: Total Cycles: 8 +# ZNVER2-NEXT: Total Cycles: 8 # ALL-NEXT: Total uOps: 3 Index: llvm/test/tools/llvm-mca/X86/cpus.s =================================================================== --- llvm/test/tools/llvm-mca/X86/cpus.s +++ llvm/test/tools/llvm-mca/X86/cpus.s @@ -3,6 +3,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BDVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s @@ -78,3 +79,8 @@ # ZNVER1-NEXT: uOps Per Cycle: 0.97 # ZNVER1-NEXT: IPC: 0.97 # ZNVER1-NEXT: Block RThroughput: 0.3 + +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 0.97 +# ZNVER2-NEXT: IPC: 0.97 +# ZNVER2-NEXT: Block RThroughput: 0.3 Index: llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s =================================================================== --- llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s +++ llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s @@ -7,6 +7,8 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 + vaddps %xmm0, %xmm0, %xmm1 vfmadd213ps (%rdi), %xmm1, %xmm2 Index: llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s =================================================================== --- llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s +++ llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s @@ -7,6 +7,8 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 + vaddps %xmm0, %xmm0, %xmm2 vfmadd213ps (%rdi), %xmm1, %xmm2 Index: llvm/test/tools/llvm-mca/X86/read-after-ld-1.s =================================================================== --- llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -7,6 +7,7 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 vdivps %xmm0, %xmm1, %xmm1 vaddps (%rax), %xmm1, %xmm1 @@ -38,6 +39,9 @@ # ZNVER1-NEXT: Total Cycles: 20 # ZNVER1-NEXT: Total uOps: 2 +# ZNVER2-NEXT: Total Cycles: 20 +# ZNVER2-NEXT: Total uOps: 2 + # BARCELONA: Dispatch Width: 4 # BARCELONA-NEXT: uOps Per Cycle: 0.15 # BARCELONA-NEXT: IPC: 0.10 @@ -78,6 +82,11 @@ # ZNVER1-NEXT: IPC: 0.10 # ZNVER1-NEXT: Block RThroughput: 1.0 +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 0.10 +# ZNVER2-NEXT: IPC: 0.10 +# ZNVER2-NEXT: Block RThroughput: 1.0 + # ALL: Timeline view: # BARCELONA-NEXT: 0123456789 @@ -104,6 +113,9 @@ # ZNVER1-NEXT: 0123456789 # ZNVER1-NEXT: Index 0123456789 +# ZNVER2-NEXT: 0123456789 +# ZNVER2-NEXT: Index 0123456789 + # BARCELONA: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # BARCELONA-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 @@ -128,6 +140,9 @@ # ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # ZNVER1-NEXT: [0,1] D=======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# ZNVER2: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 +# ZNVER2-NEXT: [0,1] D=======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -160,3 +175,6 @@ # ZNVER1-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 # ZNVER1-NEXT: 1 4.5 0.5 0.0 + +# ZNVER2-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# ZNVER2-NEXT: 1 4.5 0.5 0.0 Index: llvm/test/tools/llvm-mca/X86/read-after-ld-2.s =================================================================== --- llvm/test/tools/llvm-mca/X86/read-after-ld-2.s +++ llvm/test/tools/llvm-mca/X86/read-after-ld-2.s @@ -7,6 +7,8 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 + # Code Snippet from "Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation using Deep Neural Networks" # Charith Mendis, Saman Amarasinghe, Michael Carbin add $1, %edx @@ -49,6 +51,11 @@ # ZNVER1-NEXT: IPC: 3.64 # ZNVER1-NEXT: Block RThroughput: 1.0 +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 3.64 +# ZNVER2-NEXT: IPC: 3.64 +# ZNVER2-NEXT: Block RThroughput: 1.0 + # ALL: Timeline view: # BDWELL-NEXT: 0123456789 @@ -63,6 +70,9 @@ # ZNVER1-NEXT: 0123456789 # ZNVER1-NEXT: Index 0123456789 +# ZNVER2-NEXT: 0123456789 +# ZNVER2-NEXT: Index 0123456789 + # BDWELL: [0,0] DeER . . . .. addl $1, %edx # BDWELL-NEXT: [0,1] DeeeeeeeER. . .. vpaddd (%r8), %ymm0, %ymm0 # BDWELL-NEXT: [0,2] DeE------R. . .. addq $32, %r8 @@ -227,6 +237,47 @@ # ZNVER1-NEXT: [9,2] . . DeE-------R addq $32, %r8 # ZNVER1-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx +# ZNVER2: [0,0] DeER . . . . addl $1, %edx +# ZNVER2-NEXT: [0,1] DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [0,2] DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [0,3] D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [1,0] .DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [1,1] .DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [1,2] .DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [1,3] .D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [2,0] . DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [2,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [2,2] . DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [2,3] . D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [3,0] . DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [3,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [3,2] . DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [3,3] . D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [4,0] . DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [4,1] . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [4,2] . DeE-------R. . addq $32, %r8 +# ZNVER2-NEXT: [4,3] . D=eE------R. . cmpl %edi, %edx +# ZNVER2-NEXT: [5,0] . DeE------R. . addl $1, %edx +# ZNVER2-NEXT: [5,1] . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [5,2] . DeE-------R . addq $32, %r8 +# ZNVER2-NEXT: [5,3] . D=eE------R . cmpl %edi, %edx +# ZNVER2-NEXT: [6,0] . .DeE------R . addl $1, %edx +# ZNVER2-NEXT: [6,1] . .DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [6,2] . .DeE-------R . addq $32, %r8 +# ZNVER2-NEXT: [6,3] . .D=eE------R . cmpl %edi, %edx +# ZNVER2-NEXT: [7,0] . . DeE------R . addl $1, %edx +# ZNVER2-NEXT: [7,1] . . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [7,2] . . DeE-------R . addq $32, %r8 +# ZNVER2-NEXT: [7,3] . . D=eE------R . cmpl %edi, %edx +# ZNVER2-NEXT: [8,0] . . DeE------R . addl $1, %edx +# ZNVER2-NEXT: [8,1] . . DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [8,2] . . DeE-------R. addq $32, %r8 +# ZNVER2-NEXT: [8,3] . . D=eE------R. cmpl %edi, %edx +# ZNVER2-NEXT: [9,0] . . DeE------R. addl $1, %edx +# ZNVER2-NEXT: [9,1] . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [9,2] . . DeE-------R addq $32, %r8 +# ZNVER2-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -239,6 +290,7 @@ # HASWELL-NEXT: 0. 10 1.0 0.4 5.4 addl $1, %edx # SKYLAKE-NEXT: 0. 10 1.9 0.1 5.4 addl $1, %edx # ZNVER1-NEXT: 0. 10 1.0 0.1 5.4 addl $1, %edx +# ZNVER2-NEXT: 0. 10 1.0 0.1 5.4 addl $1, %edx # ALL-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 @@ -257,3 +309,7 @@ # ZNVER1-NEXT: 2. 10 1.0 0.1 7.0 addq $32, %r8 # ZNVER1-NEXT: 3. 10 2.0 0.0 6.0 cmpl %edi, %edx # ZNVER1-NEXT: 10 1.3 0.1 4.6 + +# ZNVER2-NEXT: 2. 10 1.0 0.1 7.0 addq $32, %r8 +# ZNVER2-NEXT: 3. 10 2.0 0.0 6.0 cmpl %edi, %edx +# ZNVER2-NEXT: 10 1.3 0.1 4.6 Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s =================================================================== --- llvm/test/tools/llvm-mca/X86/register-file-statistics.s +++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s @@ -4,6 +4,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,BDVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,BTVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s @@ -38,6 +39,11 @@ # ZNVER1-NEXT: Total number of mappings created: 0 # ZNVER1-NEXT: Max number of mappings used: 0 +# ZNVER2: * Register File #1 -- Zn2FpuPRF: +# ZNVER2-NEXT: Number of physical registers: 160 +# ZNVER2-NEXT: Total number of mappings created: 0 +# ZNVER2-NEXT: Max number of mappings used: 0 + # BDVER2: * Register File #2 -- PdIntegerPRF: # BDVER2-NEXT: Number of physical registers: 96 # BDVER2-NEXT: Total number of mappings created: 2 @@ -52,3 +58,8 @@ # ZNVER1-NEXT: Number of physical registers: 168 # ZNVER1-NEXT: Total number of mappings created: 2 # ZNVER1-NEXT: Max number of mappings used: 2 + +# ZNVER2: * Register File #2 -- Zn2IntegerPRF: +# ZNVER2-NEXT: Number of physical registers: 168 +# ZNVER2-NEXT: Total number of mappings created: 2 +# ZNVER2-NEXT: Max number of mappings used: 2 Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s =================================================================== --- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -3,6 +3,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BDVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BTVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SNB %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s @@ -88,6 +89,12 @@ # ZNVER1-NEXT: [3] Maximum number of used buffer entries. # ZNVER1-NEXT: [4] Total number of buffer entries. +# ZNVER2: Scheduler's queue usage: +# ZNVER2-NEXT: [1] Resource name. +# ZNVER2-NEXT: [2] Average number of used buffer entries. +# ZNVER2-NEXT: [3] Maximum number of used buffer entries. +# ZNVER2-NEXT: [4] Total number of buffer entries. + # BARCELONA: [1] [2] [3] [4] # BARCELONA-NEXT: SBPortAny 0 1 54 Index: llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s =================================================================== --- llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s +++ llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s @@ -3,6 +3,7 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BROADWELL # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE @@ -52,6 +53,9 @@ # ZNVER1-NEXT: 0123456789 0 # ZNVER1-NEXT: Index 0123456789 0123456789 +# ZNVER2-NEXT: 0123456789 0 +# ZNVER2-NEXT: Index 0123456789 0123456789 + # BARCELONA: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 @@ -73,6 +77,9 @@ # ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax # ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 +# ZNVER2: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER2-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -103,6 +110,9 @@ # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 # ZNVER1-NEXT: 1 1.5 0.5 0.0 +# ZNVER2-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# ZNVER2-NEXT: 1 1.5 0.5 0.0 + # ALL: [1] Code Region - test_sqrtsd # ALL: Timeline view: @@ -128,6 +138,9 @@ # ZNVER1-NEXT: 0123456789 0 # ZNVER1-NEXT: Index 0123456789 0123456789 +# ZNVER2-NEXT: 0123456789 0 +# ZNVER2-NEXT: Index 0123456789 0123456789 + # BARCELONA: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 @@ -149,6 +162,9 @@ # ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax # ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 +# ZNVER2: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER2-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -179,6 +195,9 @@ # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 # ZNVER1-NEXT: 1 1.5 0.5 0.0 +# ZNVER2-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# ZNVER2-NEXT: 1 1.5 0.5 0.0 + # ALL: [2] Code Region - test_rsqrtss # ALL: Timeline view: @@ -190,6 +209,7 @@ # HASWELL-NEXT: 0123 # SKYLAKE-NEXT: 012 # ZNVER1-NEXT: 012345 +# ZNVER2-NEXT: 012345 # ALL-NEXT: Index 0123456789 @@ -214,6 +234,9 @@ # ZNVER1: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeER rsqrtss (%rax), %xmm1 +# ZNVER2: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER2-NEXT: [0,1] D=eeeeeeeeeeeeER rsqrtss (%rax), %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -244,6 +267,9 @@ # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 # ZNVER1-NEXT: 1 1.5 0.5 0.0 +# ZNVER2-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# ZNVER2-NEXT: 1 1.5 0.5 0.0 + # ALL: [3] Code Region - test_rcp # ALL: Timeline view: @@ -255,6 +281,7 @@ # HASWELL-NEXT: 0123 # SKYLAKE-NEXT: 012 # ZNVER1-NEXT: 012345 +# ZNVER2-NEXT: 012345 # ALL-NEXT: Index 0123456789 @@ -279,6 +306,9 @@ # ZNVER1: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeER rcpss (%rax), %xmm1 +# ZNVER2: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER2-NEXT: [0,1] D=eeeeeeeeeeeeER rcpss (%rax), %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -308,3 +338,6 @@ # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 # ZNVER1-NEXT: 1 1.5 0.5 0.0 + +# ZNVER2-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# ZNVER2-NEXT: 1 1.5 0.5 0.0 Index: llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s =================================================================== --- llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s +++ llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s @@ -15,6 +15,8 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 + vaddps %xmm0, %xmm0, %xmm1 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 @@ -45,6 +47,9 @@ # ZNVER1-NEXT: Total Cycles: 11 # ZNVER1-NEXT: Total uOps: 2 +# ZNVER2-NEXT: Total Cycles: 11 +# ZNVER2-NEXT: Total uOps: 2 + # BDVER2: Dispatch Width: 4 # BDVER2-NEXT: uOps Per Cycle: 0.20 # BDVER2-NEXT: IPC: 0.20 @@ -85,6 +90,11 @@ # ZNVER1-NEXT: IPC: 0.18 # ZNVER1-NEXT: Block RThroughput: 1.0 +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 0.18 +# ZNVER2-NEXT: IPC: 0.18 +# ZNVER2-NEXT: Block RThroughput: 1.0 + # BDVER2: Timeline view: # BDVER2-NEXT: Index 0123456789 @@ -115,6 +125,10 @@ # ZNVER1-NEXT: 0 # ZNVER1-NEXT: Index 0123456789 +# ZNVER2: Timeline view: +# ZNVER2-NEXT: 0 +# ZNVER2-NEXT: Index 0123456789 + # BDVER2: [0,0] DeeeeeER . vaddps %xmm0, %xmm0, %xmm1 # BDVER2-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 @@ -139,6 +153,9 @@ # ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# ZNVER2-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -171,3 +188,6 @@ # ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER1-NEXT: 1 1.0 0.5 0.0 + +# ZNVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER2-NEXT: 1 1.0 0.5 0.0 Index: llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s =================================================================== --- llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -15,6 +15,8 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2 + vaddps %xmm0, %xmm0, %xmm2 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 @@ -45,6 +47,9 @@ # ZNVER1-NEXT: Total Cycles: 11 # ZNVER1-NEXT: Total uOps: 2 +# ZNVER2-NEXT: Total Cycles: 11 +# ZNVER2-NEXT: Total uOps: 2 + # BDVER2: Dispatch Width: 4 # BDVER2-NEXT: uOps Per Cycle: 0.20 # BDVER2-NEXT: IPC: 0.20 @@ -85,6 +90,11 @@ # ZNVER1-NEXT: IPC: 0.18 # ZNVER1-NEXT: Block RThroughput: 1.0 +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 0.18 +# ZNVER2-NEXT: IPC: 0.18 +# ZNVER2-NEXT: Block RThroughput: 1.0 + # BDVER2: Timeline view: # BDVER2-NEXT: Index 0123456789 @@ -115,6 +125,10 @@ # ZNVER1-NEXT: 0 # ZNVER1-NEXT: Index 0123456789 +# ZNVER2: Timeline view: +# ZNVER2-NEXT: 0 +# ZNVER2-NEXT: Index 0123456789 + # BDVER2: [0,0] DeeeeeER . vaddps %xmm0, %xmm0, %xmm2 # BDVER2-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 @@ -139,6 +153,9 @@ # ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 # ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# ZNVER2-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -171,3 +188,7 @@ # ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER1-NEXT: 1 1.0 0.5 0.0 + +# ZNVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER2-NEXT: 1 1.0 0.5 0.0 +