Index: test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s =================================================================== --- test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s +++ test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s @@ -1,6 +1,7 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=cortex-a57 -iterations=600 -timeline < %s | FileCheck %s - b t + b t # CHECK: Iterations: 600 # CHECK-NEXT: Instructions: 600 @@ -8,7 +9,6 @@ # CHECK-NEXT: Dispatch Width: 3 # CHECK-NEXT: IPC: 1.00 - # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -18,8 +18,7 @@ # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 1.00 b t - +# CHECK-NEXT: 1 1 1.00 b t # CHECK: Resources: # CHECK-NEXT: [0] - A57UnitB @@ -31,25 +30,37 @@ # CHECK-NEXT: [5] - A57UnitW # CHECK-NEXT: [6] - A57UnitX - # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] -# CHECK-NEXT: 1.00 - - - - - - - +# CHECK-NEXT: 1.00 - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1.00 - - - - - - - b t - +# CHECK-NEXT: 1.00 - - - - - - - b t # CHECK: Timeline view: # CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeER . . . b t + # CHECK: [1,0] D=eER. . . b t + # CHECK: [2,0] D==eER . . b t + # CHECK: [3,0] .D==eER . . b t +# CHECK: [4,0] .D===eER . . b t + +# CHECK: [5,0] .D====eER . . b t + +# CHECK: [6,0] . D====eER. . b t + +# CHECK: [7,0] . D=====eER . b t + +# CHECK: [8,0] . D======eER. b t + +# CHECK: [9,0] . D======eER b t # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -58,4 +69,5 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 10 4.3 4.3 0.0 b t +# CHECK-NEXT: 0. 10 4.3 4.3 0.0 b t + Index: test/tools/llvm-mca/AArch64/Exynos/direct-branch.s =================================================================== --- test/tools/llvm-mca/AArch64/Exynos/direct-branch.s +++ test/tools/llvm-mca/AArch64/Exynos/direct-branch.s @@ -1,19 +1,20 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=300 -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=300 -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=M1 - b t - -# ALL: Iterations: 300 -# ALL-NEXT: Instructions: 300 - -# M3-NEXT: Total Cycles: 51 -# M3-NEXT: Dispatch Width: 6 -# M3-NEXT: IPC: 5.88 - -# M1-NEXT: Total Cycles: 76 -# M1-NEXT: Dispatch Width: 4 -# M1-NEXT: IPC: 3.95 - + b t + +# M3: Iterations: 300 +# M3-NEXT: Instructions: 300 +# M3-NEXT: Total Cycles: 51 +# M3-NEXT: Dispatch Width: 6 +# M3-NEXT: IPC: 5.88 + +# M1: Iterations: 300 +# M1-NEXT: Instructions: 300 +# M1-NEXT: Total Cycles: 76 +# M1-NEXT: Dispatch Width: 4 +# M1-NEXT: IPC: 3.95 # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps @@ -24,8 +25,134 @@ # ALL-NEXT: [6]: HasSideEffects # ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 0 - b t +# ALL-NEXT: 1 0 - b t +# M1: Resources: +# M1-NEXT: [0] - M1PipeF0 +# M1-NEXT: [1] - M1PipeF1 +# M1-NEXT: [2.0] - M1UnitA +# M1-NEXT: [2.1] - M1UnitA +# M1-NEXT: [3.0] - M1UnitB +# M1-NEXT: [3.1] - M1UnitB +# M1-NEXT: [4] - M1UnitC +# M1-NEXT: [5] - M1UnitD +# M1-NEXT: [6] - M1UnitFADD +# M1-NEXT: [7] - M1UnitFCVT +# M1-NEXT: [8] - M1UnitFMAC +# M1-NEXT: [9] - M1UnitFST +# M1-NEXT: [10] - M1UnitFVAR +# M1-NEXT: [11] - M1UnitL +# M1-NEXT: [12] - M1UnitNAL0 +# M1-NEXT: [13] - M1UnitNAL1 +# M1-NEXT: [14] - M1UnitNCRYPT +# M1-NEXT: [15] - M1UnitNMISC +# M1-NEXT: [16] - M1UnitS + +# M3: Resources: +# M3-NEXT: [0] - M3PipeF0 +# M3-NEXT: [1] - M3PipeF1 +# M3-NEXT: [2] - M3PipeF2 +# M3-NEXT: [3.0] - M3UnitA +# M3-NEXT: [3.1] - M3UnitA +# M3-NEXT: [4.0] - M3UnitB +# M3-NEXT: [4.1] - M3UnitB +# M3-NEXT: [5.0] - M3UnitC +# M3-NEXT: [5.1] - M3UnitC +# M3-NEXT: [6] - M3UnitD +# M3-NEXT: [7] - M3UnitFADD0 +# M3-NEXT: [8] - M3UnitFADD1 +# M3-NEXT: [9] - M3UnitFADD2 +# M3-NEXT: [10] - M3UnitFCVT0 +# M3-NEXT: [11] - M3UnitFCVT1 +# M3-NEXT: [12.0] - M3UnitFDIV0 +# M3-NEXT: [12.1] - M3UnitFDIV0 +# M3-NEXT: [13.0] - M3UnitFDIV1 +# M3-NEXT: [13.1] - M3UnitFDIV1 +# M3-NEXT: [14] - M3UnitFMAC0 +# M3-NEXT: [15] - M3UnitFMAC1 +# M3-NEXT: [16] - M3UnitFMAC2 +# M3-NEXT: [17.0] - M3UnitFSQR +# M3-NEXT: [17.1] - M3UnitFSQR +# M3-NEXT: [18] - M3UnitFST0 +# M3-NEXT: [19] - M3UnitFST1 +# M3-NEXT: [20.0] - M3UnitL +# M3-NEXT: [20.1] - M3UnitL +# M3-NEXT: [21] - M3UnitNALU0 +# M3-NEXT: [22] - M3UnitNALU1 +# M3-NEXT: [23] - M3UnitNALU2 +# M3-NEXT: [24] - M3UnitNCRY0 +# M3-NEXT: [25] - M3UnitNCRY1 +# M3-NEXT: [26] - M3UnitNMSC +# M3-NEXT: [27] - M3UnitNMUL +# M3-NEXT: [28] - M3UnitNSHF0 +# M3-NEXT: [29] - M3UnitNSHF1 +# M3-NEXT: [30] - M3UnitNSHF2 +# M3-NEXT: [31] - M3UnitNSHT0 +# M3-NEXT: [32] - M3UnitNSHT1 +# M3-NEXT: [33] - M3UnitNSHT2 +# M3-NEXT: [34] - M3UnitS + +# M1: Resource pressure per iteration: +# M1-NEXT: [0] [1] [2.0] [2.1] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] +# M1-NEXT: - - - - - - - - - - - - - - - - - - - + +# M3: Resource pressure per iteration: +# M3-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13.0] [13.1] [14] [15] [16] [17.0] [17.1] [18] [19] [20.0] [20.1] [21] [22] [23] [24] [25] [26] [27] [28] [29] [30] [31] [32] [33] [34] +# M3-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +# M1: Resource pressure by instruction: +# M1-NEXT: [0] [1] [2.0] [2.1] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# M1-NEXT: - - - - - - - - - - - - - - - - - - - b t + +# M3: Resource pressure by instruction: +# M3-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13.0] [13.1] [14] [15] [16] [17.0] [17.1] [18] [19] [20.0] [20.1] [21] [22] [23] [24] [25] [26] [27] [28] [29] [30] [31] [32] [33] [34] Instructions: +# M3-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - b t + +# ALL: Timeline view: + +# M3: Index 012 + +# M1: Index 0123 + +# M1: [0,0] DR . b t + +# M3: [0,0] DR. b t + +# M1: [1,0] DR . b t + +# M3: [1,0] DR. b t + +# M1: [2,0] DR . b t + +# M3: [2,0] DR. b t + +# M1: [3,0] DR . b t + +# M3: [3,0] DR. b t + +# M1: [4,0] .DR. b t + +# M3: [4,0] DR. b t + +# M1: [5,0] .DR. b t + +# M3: [5,0] DR. b t + +# M3: [6,0] .DR b t + +# M1: [6,0] .DR. b t + +# M3: [7,0] .DR b t + +# M1: [7,0] .DR. b t + +# M1: [8,0] . DR b t + +# M3: [8,0] .DR b t + +# M1: [9,0] . DR b t + +# M3: [9,0] .DR b t # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions @@ -34,4 +161,5 @@ # ALL-NEXT: [3]: Average time elapsed from WB until retire stage # ALL: [0] [1] [2] [3] -# ALL-NEXT: 0. 10 0.0 0.0 0.0 b t +# ALL-NEXT: 0. 10 0.0 0.0 0.0 b t + Index: test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s =================================================================== --- test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -1,7 +1,43 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -verbose < %s | FileCheck %s -check-prefix=ALL # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=1 -verbose < %s | FileCheck %s -check-prefix=ALL - b t + b t + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 0 - b t + +# ALL: Dynamic Dispatch Stall Cycles: +# ALL-NEXT: RAT - Register unavailable: 0 +# ALL-NEXT: RCU - Retire tokens unavailable: 0 +# ALL-NEXT: SCHEDQ - Scheduler full: 0 +# ALL-NEXT: LQ - Load queue full: 0 +# ALL-NEXT: SQ - Store queue full: 0 +# ALL-NEXT: GROUP - Static restrictions on the dispatch group: 0 + +# ALL: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# ALL-NEXT: [# dispatched], [# cycles] +# ALL-NEXT: 0, 1 (50.0%) +# ALL-NEXT: 1, 1 (50.0%) + +# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL-NEXT: [# issued], [# cycles] +# ALL-NEXT: 0, 1 (50.0%) +# ALL-NEXT: 1, 1 (50.0%) + +# ALL: Retire Control Unit - number of cycles where we saw N instructions retired: +# ALL-NEXT: [# retired], [# cycles] +# ALL-NEXT: 0, 1 (50.0%) +# ALL-NEXT: 1, 1 (50.0%) # ALL: Scheduler's queue usage: # ALL-NEXT: No scheduler resources used. + Index: test/tools/llvm-mca/X86/BtVer2/dot-product.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s vmulps %xmm0, %xmm1, %xmm2 @@ -10,7 +11,6 @@ # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 1.48 - # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -24,7 +24,6 @@ # CHECK-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 - # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -43,7 +42,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - - +# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -51,7 +50,6 @@ # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4 - # CHECK: Timeline view: # CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 @@ -59,14 +57,15 @@ # CHECK: [0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: [0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 + # CHECK: [1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: [1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 + # CHECK: [2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: [2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4 - # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -77,3 +76,4 @@ # CHECK-NEXT: 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 + Index: test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s +++ test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s @@ -1,8 +1,14 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %xmm1, %xmm2 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 11 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.18 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -18,6 +24,18 @@ # CHECK: Timeline view: # CHECK-NEXT: 0 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456789 + # CHECK: [0,0] DeER . . vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: [0,1] DeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vhaddps (%rdi), %xmm1, %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s +++ test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s @@ -1,8 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %ymm1, %ymm2 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.17 + # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -15,9 +22,20 @@ # CHECK-NEXT: 1 1 0.50 vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: 2 8 2.00 * vhaddps (%rdi), %ymm1, %ymm2 - # CHECK: Timeline view: # CHECK-NEXT: 01 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456789 + # CHECK: [0,0] DeER . .. vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: [0,1] .DeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2 + Index: test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s +++ test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED @@ -9,6 +10,12 @@ # DISABLED-NOT: Instruction Info: +# ENABLED: Iterations: 70 +# ENABLED-NEXT: Instructions: 210 +# ENABLED-NEXT: Total Cycles: 149 +# ENABLED-NEXT: Dispatch Width: 2 +# ENABLED-NEXT: IPC: 1.41 + # ENABLED: Instruction Info: # ENABLED-NEXT: [1]: #uOps # ENABLED-NEXT: [2]: Latency @@ -21,3 +28,4 @@ # ENABLED-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 # ENABLED-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 # ENABLED-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 + Index: test/tools/llvm-mca/X86/BtVer2/load-store-alias.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/load-store-alias.s +++ test/tools/llvm-mca/X86/BtVer2/load-store-alias.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s vmovaps (%rsi), %xmm0 @@ -9,14 +10,13 @@ vmovaps 48(%rsi), %xmm0 vmovaps %xmm0, 48(%rdi) -# CHECK: Iterations: 100 +# CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 # CHECK-NEXT: Total Cycles: 2403 # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 0.33 - -# CHECK: Instruction Info: +# CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency # CHECK-NEXT: [3]: RThroughput @@ -24,18 +24,17 @@ # CHECK-NEXT: [5]: MayStore # CHECK-NEXT: [6]: HasSideEffects -# CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) -# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) +# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) - -# CHECK: Resources: +# CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 # CHECK-NEXT: [2] - JDiv @@ -51,12 +50,11 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL - -# CHECK: Resource pressure per iteration: +# CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - 2.00 2.00 3.99 4.01 4.00 - 4.00 4.00 - - - +# CHECK-NEXT: - - - 2.00 2.00 3.99 4.01 4.00 - 4.00 4.00 - - - -# CHECK: Resource pressure by instruction: +# CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - 1.00 0.99 0.01 1.00 - - - - - - vmovaps (%rsi), %xmm0 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi) @@ -67,9 +65,8 @@ # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi) - # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 0123456 # CHECK: [0,0] DeeeeeER . . . .. vmovaps (%rsi), %xmm0 @@ -81,7 +78,7 @@ # CHECK-NEXT: [0,6] . D===============eeeeeER. vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: [0,7] . D====================eER vmovaps %xmm0, 48(%rdi) -# CHECK: Average Wait times (based on the timeline view): +# CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready @@ -96,3 +93,4 @@ # CHECK-NEXT: 5. 1 16.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 16.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 21.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) + Index: test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s +++ test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s vmovaps (%rsi), %xmm0 @@ -9,15 +10,13 @@ vmovaps 48(%rsi), %xmm0 vmovaps %xmm0, 48(%rdi) - -# CHECK: Iterations: 100 +# CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 # CHECK-NEXT: Total Cycles: 408 # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 1.96 - -# CHECK: Instruction Info: +# CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency # CHECK-NEXT: [3]: RThroughput @@ -25,18 +24,17 @@ # CHECK-NEXT: [5]: MayStore # CHECK-NEXT: [6]: HasSideEffects -# CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) -# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) +# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) - -# CHECK: Resources: +# CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 # CHECK-NEXT: [2] - JDiv @@ -52,12 +50,11 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL - -# CHECK: Resource pressure per iteration: +# CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - - +# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - - -# CHECK: Resource pressure by instruction: +# CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - 1.00 0.98 0.02 1.00 - - - - - - vmovaps (%rsi), %xmm0 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi) @@ -68,10 +65,9 @@ # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi) - -# CHECK: Timeline view: -# CHECK-NEXT: 01 -# CHECK-NEXT: Index 0123456789 +# CHECK: Timeline view: +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeeER .. vmovaps (%rsi), %xmm0 # CHECK-NEXT: [0,1] D=====eER .. vmovaps %xmm0, (%rdi) @@ -82,8 +78,7 @@ # CHECK-NEXT: [0,6] . DeeeeeER. vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: [0,7] . D=====eER vmovaps %xmm0, 48(%rdi) - -# CHECK: Average Wait times (based on the timeline view): +# CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready @@ -98,3 +93,4 @@ # CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) + Index: test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=2 < %s | FileCheck %s # VALU0/VALU1 @@ -16,14 +17,12 @@ vaddps %ymm0, %ymm1, %ymm2 vsqrtps %ymm0, %ymm2 - # CHECK: Iterations: 70 # CHECK-NEXT: Instructions: 560 # CHECK-NEXT: Total Cycles: 4416 # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 0.13 - # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -42,7 +41,6 @@ # CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2 - # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -59,10 +57,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL - # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00 +# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - 2.00 1.00 - - - - 0.03 0.97 2.00 vpmulld %xmm0, %xmm1, %xmm2 @@ -74,11 +72,10 @@ # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - 42.00 - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2 - - # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + # CHECK: [0,0] DeeeeER . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,1] .DeE--R . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,2] . DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2 @@ -87,20 +84,20 @@ # CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . vsqrtps %xmm0, %xmm2 # CHECK-NEXT: [0,6] . DeeeE-----------------R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: [0,7] . D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . vsqrtps %ymm0, %ymm2 + # CHECK: [1,0] . .DeeeeE--------------------------------------------------------R . vpmulld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,1] . . DeE-----------------------------------------------------------R. vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,2] . . DeeeE--------------------------------------------------------R. vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: [1,3] . . DeeE----------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,4] . . DeeeE--------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2 - # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage -# CHECK: [0] [1] [2] [3] +# CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 1.0 1.0 28.0 vpmulld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2 @@ -109,3 +106,4 @@ # CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2 # CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2 + Index: test/tools/llvm-mca/X86/BtVer2/read-advance-1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/read-advance-1.s +++ test/tools/llvm-mca/X86/BtVer2/read-advance-1.s @@ -1,19 +1,18 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s # The vmul can start executing 3cy in advance. That is beause the first use # operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is # evaluated before %xmm1. - vaddps %xmm0, %xmm0, %xmm1 vmulps (%rdi), %xmm1, %xmm2 - # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 10 # CHECK-NEXT: Dispatch Width: 2 - +# CHECK-NEXT: IPC: 0.20 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -27,14 +26,13 @@ # CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1 7 1.00 * vmulps (%rdi), %xmm1, %xmm2 - # CHECK: Timeline view: # CHECK: Index 0123456789 + # CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: [0,1] DeeeeeeeER vmulps (%rdi), %xmm1, %xmm2 - # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -44,3 +42,4 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/read-advance-2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/read-advance-2.s +++ test/tools/llvm-mca/X86/BtVer2/read-advance-2.s @@ -1,13 +1,19 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=0 -timeline < %s | FileCheck %s imull %esi imull (%rdi) - # The second integer multiply can start at cycle 2 because the implicit reads # can start after the load operand is evaluated. -# CHECK: Instruction Info: +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 + +# CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency # CHECK-NEXT: [3]: RThroughput @@ -19,8 +25,20 @@ # CHECK-NEXT: 2 3 1.00 imull %esi # CHECK-NEXT: 2 6 1.00 * imull (%rdi) - # CHECK: Timeline view: + # CHECK: Index 0123456789 + # CHECK: [0,0] DeeeER . imull %esi # CHECK-NEXT: [0,1] .DeeeeeeER imull (%rdi) + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi) + Index: test/tools/llvm-mca/X86/BtVer2/register-files-1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/register-files-1.s +++ test/tools/llvm-mca/X86/BtVer2/register-files-1.s @@ -1,11 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=5 -verbose -register-file-stats -timeline < %s | FileCheck %s vaddps %xmm0, %xmm0, %xmm0 vmulps %xmm0, %xmm0, %xmm0 -# CHECK: Iterations: 5 +# CHECK: Iterations: 5 # CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.36 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 1 2 1.00 vmulps %xmm0, %xmm0, %xmm0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 @@ -15,10 +30,29 @@ # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 23 (82.1%) +# CHECK-NEXT: 2, 5 (17.9%) + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 18 (64.3%) +# CHECK-NEXT: 1, 10 (35.7%) + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 18 (64.3%) +# CHECK-NEXT: 1, 10 (35.7%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: JALU01, 0/20 +# CHECK-NEXT: JFPU01, 8/18 +# CHECK-NEXT: JLSAGU, 0/12 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 10 -# CHECK-NEXT: Max number of mappings used: 10 +# CHECK-NEXT: Total number of mappings created: 10 +# CHECK-NEXT: Max number of mappings used: 10 # CHECK: * Register File #1 -- FpuPRF: # CHECK-NEXT: Number of physical registers: 72 @@ -30,17 +64,57 @@ # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL -# CHECK: Timeline view: -# CHECK-NEXT: 0123456789 +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 01234567 + # CHECK: [0,0] DeeeER . . . . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [0,1] D===eeER . . . . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [1,0] .D====eeeER . . . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [1,1] .D=======eeER . . . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [2,0] . D========eeeER . . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [2,1] . D===========eeER . . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [3,0] . D============eeeER . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [3,1] . D===============eeER . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [4,0] . D================eeeER . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [4,1] . D===================eeER vmulps %xmm0, %xmm0, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 5 9.0 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 1. 5 12.0 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0 + Index: test/tools/llvm-mca/X86/BtVer2/register-files-2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/register-files-2.s +++ test/tools/llvm-mca/X86/BtVer2/register-files-2.s @@ -1,13 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -register-file-size=5 -iterations=5 -verbose -register-file-stats -timeline < %s | FileCheck %s vaddps %xmm0, %xmm0, %xmm0 vmulps %xmm0, %xmm0, %xmm0 -# CHECK: Iterations: 5 +# CHECK: Iterations: 5 # CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.36 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects -# CHECK: Dynamic Dispatch Stall Cycles: +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 1 2 1.00 vmulps %xmm0, %xmm0, %xmm0 + +# CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 13 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 @@ -15,10 +30,30 @@ # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 20 (71.4%) +# CHECK-NEXT: 2, 2 (7.1%) +# CHECK-NEXT: 1, 6 (21.4%) + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 18 (64.3%) +# CHECK-NEXT: 1, 10 (35.7%) + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 18 (64.3%) +# CHECK-NEXT: 1, 10 (35.7%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: JALU01, 0/20 +# CHECK-NEXT: JFPU01, 4/18 +# CHECK-NEXT: JLSAGU, 0/12 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 10 -# CHECK-NEXT: Max number of mappings used: 5 +# CHECK-NEXT: Total number of mappings created: 10 +# CHECK-NEXT: Max number of mappings used: 5 # CHECK: * Register File #1 -- FpuPRF: # CHECK-NEXT: Number of physical registers: 72 @@ -30,17 +65,57 @@ # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 01234567 + # CHECK: [0,0] DeeeER . . . . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [0,1] D===eeER . . . . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [1,0] .D====eeeER . . . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [1,1] .D=======eeER . . . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [2,0] . D========eeeER . . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [2,1] . D========eeER . . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [3,0] . . D========eeeER . . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [3,1] . . D========eeER . . vmulps %xmm0, %xmm0, %xmm0 + # CHECK: [4,0] . . . D========eeeER . vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [4,1] . . . D========eeER vmulps %xmm0, %xmm0, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 5 6.6 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 1. 5 7.8 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0 + Index: test/tools/llvm-mca/X86/BtVer2/register-files-3.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/register-files-3.s +++ test/tools/llvm-mca/X86/BtVer2/register-files-3.s @@ -1,9 +1,13 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -register-file-size=5 -iterations=2 -verbose -register-file-stats -timeline < %s | FileCheck %s idiv %eax # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 55 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.04 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -16,7 +20,6 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 2 25 25.00 * idivl %eax - # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 26 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 @@ -25,10 +28,29 @@ # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 53 (96.4%) +# CHECK-NEXT: 1, 2 (3.6%) + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 53 (96.4%) +# CHECK-NEXT: 1, 2 (3.6%) + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 53 (96.4%) +# CHECK-NEXT: 1, 2 (3.6%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: JALU01, 1/20 +# CHECK-NEXT: JFPU01, 0/18 +# CHECK-NEXT: JLSAGU, 0/12 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 6 -# CHECK-NEXT: Max number of mappings used: 3 +# CHECK-NEXT: Total number of mappings created: 6 +# CHECK-NEXT: Max number of mappings used: 3 # CHECK: * Register File #1 -- FpuPRF: # CHECK-NEXT: Number of physical registers: 72 @@ -40,10 +62,44 @@ # CHECK-NEXT: Total number of mappings created: 6 # CHECK-NEXT: Max number of mappings used: 3 +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - - idivl %eax # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 01234 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 +# CHECK-NEXT: 0123456789 0123456789 01234 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax + # CHECK: [1,0] . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 1.0 1.0 0.0 idivl %eax + Index: test/tools/llvm-mca/X86/BtVer2/register-files-4.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/register-files-4.s +++ test/tools/llvm-mca/X86/BtVer2/register-files-4.s @@ -1,9 +1,13 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=22 -verbose -register-file-stats -timeline -timeline-max-iterations=3 < %s | FileCheck %s idiv %eax # CHECK: Iterations: 22 # CHECK-NEXT: Instructions: 22 +# CHECK-NEXT: Total Cycles: 553 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.04 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -16,7 +20,6 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 2 25 25.00 * idivl %eax - # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 6 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 @@ -25,10 +28,29 @@ # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 531 (96.0%) +# CHECK-NEXT: 1, 22 (4.0%) + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 531 (96.0%) +# CHECK-NEXT: 1, 22 (4.0%) + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 531 (96.0%) +# CHECK-NEXT: 1, 22 (4.0%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: JALU01, 20/20 +# CHECK-NEXT: JFPU01, 0/18 +# CHECK-NEXT: JLSAGU, 0/12 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 66 -# CHECK-NEXT: Max number of mappings used: 63 +# CHECK-NEXT: Total number of mappings created: 66 +# CHECK-NEXT: Max number of mappings used: 63 # CHECK: * Register File #1 -- FpuPRF: # CHECK-NEXT: Number of physical registers: 72 @@ -40,10 +62,46 @@ # CHECK-NEXT: Total number of mappings created: 66 # CHECK-NEXT: Max number of mappings used: 63 +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - - idivl %eax # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 01234567 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + # CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax + # CHECK: [1,0] .D========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax + # CHECK: [2,0] . D================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 25.0 0.3 0.0 idivl %eax + Index: test/tools/llvm-mca/X86/BtVer2/register-files-5.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/register-files-5.s +++ test/tools/llvm-mca/X86/BtVer2/register-files-5.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -verbose -register-file-stats -timeline < %s | FileCheck %s vdivps %ymm0, %ymm0, %ymm1 @@ -34,14 +35,12 @@ vaddps %ymm3, %ymm0, %ymm5 vaddps %ymm3, %ymm0, %ymm6 - # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 33 # CHECK-NEXT: Total Cycles: 70 # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 0.47 - # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 8 @@ -50,10 +49,30 @@ # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 37 (52.9%) +# CHECK-NEXT: 1, 33 (47.1%) + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 37 (52.9%) +# CHECK-NEXT: 1, 33 (47.1%) + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 49 (70.0%) +# CHECK-NEXT: 2, 12 (17.1%) +# CHECK-NEXT: 1, 9 (12.9%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: JALU01, 0/20 +# CHECK-NEXT: JFPU01, 16/18 +# CHECK-NEXT: JLSAGU, 0/12 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 66 -# CHECK-NEXT: Max number of mappings used: 64 +# CHECK-NEXT: Total number of mappings created: 66 +# CHECK-NEXT: Max number of mappings used: 64 # CHECK: * Register File #1 -- FpuPRF: # CHECK-NEXT: Number of physical registers: 72 @@ -65,7 +84,6 @@ # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 - # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 @@ -103,3 +121,45 @@ # CHECK-NEXT: [0,30] . . . . . . D==============================eeeER . vaddps %ymm3, %ymm0, %ymm4 # CHECK-NEXT: [0,31] . . . . . . .D===============================eeeER . vaddps %ymm3, %ymm0, %ymm5 # CHECK-NEXT: [0,32] . . . . . . . . D========================eeeER vaddps %ymm3, %ymm0, %ymm6 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vdivps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 1. 1 1.0 1.0 34.0 vaddps %ymm0, %ymm0, %ymm2 +# CHECK-NEXT: 2. 1 2.0 2.0 33.0 vaddps %ymm0, %ymm0, %ymm3 +# CHECK-NEXT: 3. 1 3.0 3.0 31.0 vaddps %ymm0, %ymm0, %ymm4 +# CHECK-NEXT: 4. 1 4.0 4.0 30.0 vaddps %ymm0, %ymm0, %ymm5 +# CHECK-NEXT: 5. 1 5.0 5.0 28.0 vaddps %ymm0, %ymm0, %ymm6 +# CHECK-NEXT: 6. 1 6.0 6.0 27.0 vaddps %ymm0, %ymm0, %ymm7 +# CHECK-NEXT: 7. 1 7.0 7.0 25.0 vaddps %ymm0, %ymm0, %ymm8 +# CHECK-NEXT: 8. 1 8.0 8.0 24.0 vaddps %ymm0, %ymm0, %ymm9 +# CHECK-NEXT: 9. 1 9.0 9.0 22.0 vaddps %ymm0, %ymm0, %ymm10 +# CHECK-NEXT: 10. 1 10.0 10.0 21.0 vaddps %ymm0, %ymm0, %ymm11 +# CHECK-NEXT: 11. 1 11.0 11.0 19.0 vaddps %ymm0, %ymm0, %ymm12 +# CHECK-NEXT: 12. 1 12.0 12.0 18.0 vaddps %ymm0, %ymm0, %ymm13 +# CHECK-NEXT: 13. 1 13.0 13.0 16.0 vaddps %ymm0, %ymm0, %ymm14 +# CHECK-NEXT: 14. 1 14.0 14.0 15.0 vaddps %ymm0, %ymm0, %ymm15 +# CHECK-NEXT: 15. 1 15.0 15.0 13.0 vaddps %ymm2, %ymm0, %ymm0 +# CHECK-NEXT: 16. 1 17.0 0.0 11.0 vaddps %ymm2, %ymm0, %ymm3 +# CHECK-NEXT: 17. 1 18.0 2.0 9.0 vaddps %ymm2, %ymm0, %ymm4 +# CHECK-NEXT: 18. 1 19.0 4.0 8.0 vaddps %ymm2, %ymm0, %ymm5 +# CHECK-NEXT: 19. 1 20.0 6.0 6.0 vaddps %ymm2, %ymm0, %ymm6 +# CHECK-NEXT: 20. 1 21.0 8.0 5.0 vaddps %ymm2, %ymm0, %ymm7 +# CHECK-NEXT: 21. 1 22.0 10.0 3.0 vaddps %ymm2, %ymm0, %ymm8 +# CHECK-NEXT: 22. 1 23.0 12.0 2.0 vaddps %ymm2, %ymm0, %ymm9 +# CHECK-NEXT: 23. 1 24.0 14.0 0.0 vaddps %ymm2, %ymm0, %ymm10 +# CHECK-NEXT: 24. 1 25.0 16.0 0.0 vaddps %ymm2, %ymm0, %ymm11 +# CHECK-NEXT: 25. 1 26.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm12 +# CHECK-NEXT: 26. 1 27.0 20.0 0.0 vaddps %ymm2, %ymm0, %ymm13 +# CHECK-NEXT: 27. 1 28.0 22.0 0.0 vaddps %ymm2, %ymm0, %ymm14 +# CHECK-NEXT: 28. 1 29.0 24.0 0.0 vaddps %ymm2, %ymm0, %ymm15 +# CHECK-NEXT: 29. 1 30.0 23.0 0.0 vaddps %ymm3, %ymm0, %ymm2 +# CHECK-NEXT: 30. 1 31.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm4 +# CHECK-NEXT: 31. 1 32.0 27.0 0.0 vaddps %ymm3, %ymm0, %ymm5 +# CHECK-NEXT: 32. 1 25.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm6 + Index: test/tools/llvm-mca/X86/BtVer2/resources-aes.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-aes.s +++ test/tools/llvm-mca/X86/BtVer2/resources-aes.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s aesdec %xmm0, %xmm2 @@ -18,6 +19,28 @@ aeskeygenassist $22, %xmm0, %xmm2 aeskeygenassist $22, (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 3 1.00 aesdec %xmm0, %xmm2 +# CHECK-NEXT: 2 8 1.00 * aesdec (%rax), %xmm2 +# CHECK-NEXT: 2 3 1.00 aesdeclast %xmm0, %xmm2 +# CHECK-NEXT: 2 8 1.00 * aesdeclast (%rax), %xmm2 +# CHECK-NEXT: 2 3 1.00 aesenc %xmm0, %xmm2 +# CHECK-NEXT: 2 8 1.00 * aesenc (%rax), %xmm2 +# CHECK-NEXT: 2 3 1.00 aesenclast %xmm0, %xmm2 +# CHECK-NEXT: 2 8 1.00 * aesenclast (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 aesimc %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * aesimc (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 aeskeygenassist $22, %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * aeskeygenassist $22, (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -34,6 +57,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - 12.00 - 6.00 - - - - - 12.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 aesdec %xmm0, %xmm2 @@ -48,3 +75,4 @@ # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 aesimc (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 aeskeygenassist $22, %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 aeskeygenassist $22, (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s vaddpd %xmm0, %xmm1, %xmm2 @@ -1008,7 +1009,6 @@ vzeroall vzeroupper - # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -1702,7 +1702,6 @@ # CHECK-NEXT: 73 90 - * * * vzeroall # CHECK-NEXT: 37 46 - * * * vzeroupper - # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -1719,6 +1718,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 43.00 2.00 - 351.50 908.50 402.00 398.00 386.00 - 43.00 109.00 120.50 120.50 40.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 @@ -2404,3 +2407,4 @@ # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - - vzeroall # CHECK-NEXT: - - - - - - - - - - - - - - vzeroupper + Index: test/tools/llvm-mca/X86/BtVer2/resources-clmul.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-clmul.s +++ test/tools/llvm-mca/X86/BtVer2/resources-clmul.s @@ -1,8 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s pclmulqdq $11, %xmm0, %xmm2 pclmulqdq $11, (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 2 1.00 pclmulqdq $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pclmulqdq $11, (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -19,7 +32,12 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - 2.00 - 1.00 - - - - - 2.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pclmulqdq $11, %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pclmulqdq $11, (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-f16c.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-f16c.s +++ test/tools/llvm-mca/X86/BtVer2/resources-f16c.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck %s vcvtph2ps %xmm0, %xmm2 @@ -12,6 +13,30 @@ vcvtps2ph $0, %ymm0, %xmm2 vcvtps2ph $0, %ymm0, (%rax) +# CHECK: Iterations: 70 +# CHECK-NEXT: Instructions: 560 +# CHECK-NEXT: Total Cycles: 1053 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.53 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 2 3 2.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 6 2.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 3 11 2.00 * vcvtps2ph $0, %ymm0, (%rax) + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -28,6 +53,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - - + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtph2ps %xmm0, %xmm2 @@ -38,3 +67,4 @@ # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.80 0.20 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2 # CHECK-NEXT: - - - 0.20 1.80 - 2.00 - - 1.00 2.00 - - - vcvtps2ph $0, %ymm0, (%rax) + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse1.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse1.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s addps %xmm0, %xmm2 @@ -134,7 +135,6 @@ xorps %xmm0, %xmm2 xorps (%rax), %xmm2 - # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -236,7 +236,6 @@ # CHECK-NEXT: 1 1 0.50 xorps %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * xorps (%rax), %xmm2 - # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -253,8 +252,11 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 13.00 - - 44.50 183.50 37.50 47.50 42.00 - 7.00 15.00 1.00 1.00 - -# CHECK: Resource pressure by instruction: +# CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - addps (%rax), %xmm2 @@ -347,3 +349,4 @@ # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - unpcklps (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - xorps %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - xorps (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse2.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse2.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s addpd %xmm0, %xmm2 @@ -375,6 +376,270 @@ xorpd %xmm0, %xmm2 xorpd (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * addpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * addsd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * andnpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * andpd (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 cmppd $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * cmppd $0, (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 cmpsd $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * cmpsd $0, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 +# CHECK-NEXT: 1 8 1.00 * comisd (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %ecx +# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %rcx +# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 2 7 2.00 cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: 2 12 2.00 * cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 cvtsi2sdl %ecx, %xmm2 +# CHECK-NEXT: 2 9 1.00 cvtsi2sdq %rcx, %xmm2 +# CHECK-NEXT: 2 14 1.00 * cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 2 14 1.00 * cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 2 7 2.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 12 2.00 * cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 2 7 1.00 cvttsd2si %xmm0, %ecx +# CHECK-NEXT: 2 7 1.00 cvttsd2si %xmm0, %rcx +# CHECK-NEXT: 2 12 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 2 12 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 1 19 19.00 divpd %xmm0, %xmm2 +# CHECK-NEXT: 1 24 19.00 * divpd (%rax), %xmm2 +# CHECK-NEXT: 1 19 19.00 divsd %xmm0, %xmm2 +# CHECK-NEXT: 1 24 19.00 * divsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 * * * maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 1 2 1.00 maxpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * maxpd (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 maxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * maxsd (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 minpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * minpd (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 minsd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * minsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax) +# CHECK-NEXT: 1 5 1.00 * movapd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movd %eax, %xmm2 +# CHECK-NEXT: 1 5 1.00 * movd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movd %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 * movd %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 movdqa %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax) +# CHECK-NEXT: 1 5 1.00 * movdqa (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movdqu %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax) +# CHECK-NEXT: 1 5 1.00 * movdqu (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 * movhpd %xmm0, (%rax) +# CHECK-NEXT: 1 6 1.00 * movhpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 * movlpd %xmm0, (%rax) +# CHECK-NEXT: 1 6 1.00 * movlpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 movmskpd %xmm0, %ecx +# CHECK-NEXT: 1 2 1.00 * movntdq %xmm0, (%rax) +# CHECK-NEXT: 1 3 1.00 * movntpd %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 movq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 movq %rax, %xmm2 +# CHECK-NEXT: 1 5 1.00 * movq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movq %xmm0, %rcx +# CHECK-NEXT: 1 1 1.00 * movq %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 movsd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movsd %xmm0, (%rax) +# CHECK-NEXT: 1 5 1.00 * movsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax) +# CHECK-NEXT: 1 5 1.00 * movupd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 1 9 2.00 * mulpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 mulsd %xmm0, %xmm2 +# CHECK-NEXT: 1 9 2.00 * mulsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * orpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pabsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pabsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pabsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packssdw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * packssdw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packsswb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * packsswb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packuswb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * packuswb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddsb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddusb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddusb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddusw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddusw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * paddw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pand %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pand (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pandn %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pandn (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pavgb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pavgb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pavgw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pavgw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pextrw $1, %xmm0, %ecx +# CHECK-NEXT: 1 2 1.00 pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmaxsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmaxub (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pminsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pminsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pminub (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 pmovmskb %xmm0, %ecx +# CHECK-NEXT: 1 2 1.00 pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmulhw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmulhw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmullw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmullw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmuludq %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * por (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psadbw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psadbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshufd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshufhw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshuflw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pslld $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 pslld %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pslld (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psllq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psllq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psllq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psllw $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psllw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psllw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrad $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrad %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psrad (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psraw $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psraw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psraw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrld $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrld %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psrld (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psrlq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlw $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psrlw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubsb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubusb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubusb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubusw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubusw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psubw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhbw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpckhbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhdq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpckhdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhqdq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhwd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpckhwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpcklbw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpcklbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckldq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpckldq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpcklqdq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpcklwd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * punpcklwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pxor %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pxor (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 27 27.00 sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 1 32 27.00 * sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1 27 27.00 sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: 1 32 27.00 * sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * subpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * subsd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 ucomisd %xmm0, %xmm1 +# CHECK-NEXT: 1 8 1.00 * ucomisd (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 unpckhpd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * unpckhpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 unpcklpd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * unpcklpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 xorpd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * xorpd (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -391,8 +656,11 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 16.00 2.00 - 46.50 202.50 114.50 127.50 114.00 - 12.00 43.00 66.00 66.00 10.00 -# CHECK: Resource pressure by instruction: +# CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addpd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - addpd (%rax), %xmm2 @@ -648,3 +916,4 @@ # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - unpcklpd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - xorpd %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - xorpd (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse3.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse3.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse3.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s addsubpd %xmm0, %xmm2 @@ -29,6 +30,35 @@ movsldup %xmm0, %xmm2 movsldup (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * addsubps (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 1 5 1.00 * lddqu (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * movshdup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * movsldup (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -45,6 +75,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 15.00 3.00 15.50 3.50 10.00 - - - 0.50 0.50 - + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addsubpd %xmm0, %xmm2 @@ -66,3 +100,4 @@ # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movshdup (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movsldup %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movsldup (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse41.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse41.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse41.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s blendpd $11, %xmm0, %xmm2 @@ -145,6 +146,112 @@ roundss $1, %xmm0, %xmm2 roundss $1, (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 blendpd $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 blendps $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * blendps $11, (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 2.00 * blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 2.00 * blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 9 3.00 dppd $22, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 3.00 * dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 5 11 3.00 dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: 5 16 3.00 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 extractps $1, %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 * extractps $1, %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * insertps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 5 1.00 * movntdqa (%rax), %xmm2 +# CHECK-NEXT: 1 3 2.00 mpsadbw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packusdw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * packusdw (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 pblendvb %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 2.00 * pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pblendw $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pextrb $1, %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 * pextrb $1, %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 pextrd $1, %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 * pextrd $1, %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 pextrq $1, %xmm0, %rcx +# CHECK-NEXT: 2 6 1.00 * pextrq $1, %xmm0, (%rax) +# CHECK-NEXT: 2 6 1.00 * pextrw $1, %xmm0, (%rax) +# CHECK-NEXT: 1 2 1.00 phminposuw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pinsrb $1, %eax, %xmm1 +# CHECK-NEXT: 1 6 1.00 * pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 pinsrd $1, %eax, %xmm1 +# CHECK-NEXT: 1 6 1.00 * pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 pinsrq $1, %rax, %xmm1 +# CHECK-NEXT: 1 6 1.00 * pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 pmaxsb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmaxsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmaxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmaxsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmaxud %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmaxud (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmaxuw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmaxuw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pminsb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pminsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pminsd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pminsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pminud %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pminud (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pminuw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pminuw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmuldq %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmuldq (%rax), %xmm2 +# CHECK-NEXT: 3 4 2.00 pmulld %xmm0, %xmm2 +# CHECK-NEXT: 3 9 2.00 * pmulld (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 ptest %xmm0, %xmm1 +# CHECK-NEXT: 1 8 1.00 * ptest (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * roundss $1, (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -161,6 +268,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.00 - - 34.00 24.00 58.00 42.00 49.00 - 5.00 - 35.00 35.00 12.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - blendpd $11, %xmm0, %xmm2 @@ -259,3 +370,4 @@ # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundsd $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundss $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundss $1, (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse42.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse42.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse42.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s crc32b %al, %ecx @@ -30,6 +31,36 @@ pcmpgtq %xmm0, %xmm2 pcmpgtq (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 3 3 2.00 crc32b %al, %ecx +# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %ecx +# CHECK-NEXT: 3 3 2.00 crc32l %eax, %ecx +# CHECK-NEXT: 3 6 2.00 * crc32l (%rax), %ecx +# CHECK-NEXT: 3 3 2.00 crc32w %ax, %ecx +# CHECK-NEXT: 3 6 2.00 * crc32w (%rax), %ecx +# CHECK-NEXT: 3 3 2.00 crc32b %al, %rcx +# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %rcx +# CHECK-NEXT: 3 3 2.00 crc32q %rax, %rcx +# CHECK-NEXT: 3 6 2.00 * crc32q (%rax), %rcx +# CHECK-NEXT: 9 14 5.00 pcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: 9 19 5.00 * pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 9 14 5.00 pcmpestrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 9 19 5.00 * pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3 7 2.00 pcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 12 2.00 * pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3 8 2.00 pcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 13 2.00 * pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtq %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pcmpgtq (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -46,6 +77,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 28.00 20.00 - 8.00 - 1.00 9.00 18.00 - 8.00 - 13.00 37.00 - + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - crc32b %al, %ecx @@ -68,3 +103,4 @@ # CHECK-NEXT: 1.00 - - 1.00 - - 1.00 1.00 - - - - 2.00 - pcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpgtq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pcmpgtq (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse4a.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse4a.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse4a.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s extrq %xmm0, %xmm2 @@ -9,6 +10,22 @@ movntsd %xmm0, (%rax) movntss %xmm0, (%rax) +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 extrq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 extrq $22, $2, %xmm2 +# CHECK-NEXT: 1 2 2.00 insertq %xmm0, %xmm2 +# CHECK-NEXT: 1 2 2.00 insertq $22, $22, %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 * movntsd %xmm0, (%rax) +# CHECK-NEXT: 1 3 1.00 * movntss %xmm0, (%rax) + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -25,6 +42,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - 2.00 4.00 - - 2.00 2.00 5.00 5.00 - + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - extrq %xmm0, %xmm2 @@ -33,3 +54,4 @@ # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 2.00 2.00 - insertq $22, $22, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntsd %xmm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntss %xmm0, (%rax) + Index: test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s +++ test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s palignr $1, %xmm0, %xmm2 @@ -39,6 +40,42 @@ psignw %xmm0, %xmm2 psignw (%rax), %xmm2 +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 pshufb %xmm0, %xmm2 +# CHECK-NEXT: 3 7 2.00 * pshufb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psignb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psignd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * psignw (%rax), %xmm2 + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -55,6 +92,10 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - 15.00 11.00 13.00 - - - 14.00 14.00 4.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - palignr $1, %xmm0, %xmm2 @@ -83,3 +124,4 @@ # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignd (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignw (%rax), %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -1,8 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -verbose < %s | FileCheck %s vmulps (%rsi), %xmm0, %xmm0 add %rsi, %rsi +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 7 1.00 * vmulps (%rsi), %xmm0, %xmm0 +# CHECK-NEXT: 1 1 0.50 addq %rsi, %rsi + +# CHECK: Dynamic Dispatch Stall Cycles: +# CHECK-NEXT: RAT - Register unavailable: 0 +# CHECK-NEXT: RCU - Retire tokens unavailable: 0 +# CHECK-NEXT: SCHEDQ - Scheduler full: 0 +# CHECK-NEXT: LQ - Load queue full: 0 +# CHECK-NEXT: SQ - Store queue full: 0 +# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 + +# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 9 (90.0%) +# CHECK-NEXT: 2, 1 (10.0%) + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 9 (90.0%) +# CHECK-NEXT: 2, 1 (10.0%) + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 9 (90.0%) +# CHECK-NEXT: 2, 1 (10.0%) + # CHECK: Scheduler's queue usage: # CHECK-NEXT: JALU01, 1/20 # CHECK-NEXT: JFPU01, 1/18 @@ -26,9 +68,10 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - 1.00 - - 1.00 - 1.00 1.00 - - - - - - +# CHECK-NEXT: - 1.00 - - 1.00 - 1.00 1.00 - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vmulps (%rsi), %xmm0, %xmm0 # CHECK-NEXT: - 1.00 - - - - - - - - - - - - addq %rsi, %rsi + Index: test/tools/llvm-mca/X86/BtVer2/simple-test.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/simple-test.s +++ test/tools/llvm-mca/X86/BtVer2/simple-test.s @@ -1,15 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 < %s | FileCheck %s add %edi, %eax -# CHECK: Iterations: 100 +# CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 # CHECK-NEXT: Total Cycles: 103 # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 0.97 - -# CHECK: Instruction Info: +# CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency # CHECK-NEXT: [3]: RThroughput @@ -17,11 +17,10 @@ # CHECK-NEXT: [5]: MayStore # CHECK-NEXT: [6]: HasSideEffects -# CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 0.50 addl %edi, %eax +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 addl %edi, %eax - -# CHECK-LABEL: Resources: +# CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 # CHECK-NEXT: [2] - JDiv @@ -37,11 +36,11 @@ # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL - -# CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - -# CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax + Index: test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s +++ test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s @@ -1,8 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s vaddps %xmm0, %xmm0, %xmm1 vandps (%rdi), %xmm1, %xmm2 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 9 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.22 + # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -15,8 +22,20 @@ # CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1 6 1.00 * vandps (%rdi), %xmm1, %xmm2 - # CHECK: Timeline view: + # CHECK: Index 012345678 + # CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: [0,1] DeeeeeeER vandps (%rdi), %xmm1, %xmm2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %xmm1, %xmm2 + Index: test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s +++ test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s @@ -1,8 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s vaddps %ymm0, %ymm0, %ymm1 vandps (%rdi), %ymm1, %ymm2 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 + # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -15,8 +22,20 @@ # CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 2 6 2.00 * vandps (%rdi), %ymm1, %ymm2 - # CHECK: Timeline view: + # CHECK: Index 0123456789 + # CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: [0,1] .DeeeeeeER vandps (%rdi), %ymm1, %ymm2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vandps (%rdi), %ymm1, %ymm2 + Index: utils/update_mca_test_checks.py =================================================================== --- /dev/null +++ utils/update_mca_test_checks.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python2.7 + +"""A test case update script. + +This script is a utility to update LLVM 'llvm-mca' based test cases with new +FileCheck patterns. +""" + +import argparse +from collections import defaultdict +import glob +import os +import sys +import warnings + +from UpdateTestChecks import common + + +COMMENT_CHAR = '#' +ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format( + COMMENT_CHAR) +ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__)) + + +class Error(Exception): + """ Generic Error to be raised without printing a traceback. + """ + pass + + +def _warn(msg): + """ Log a user warning to stderr. + """ + warnings.warn(msg, Warning, stacklevel=2) + + +def _configure_warnings(args): + warnings.resetwarnings() + if args.w: + warnings.simplefilter('ignore') + if args.Werror: + warnings.simplefilter('error') + + +def _showwarning(message, category, filename, lineno, file=None, line=None): + """ Version of warnings.showwarning that won't attempt to print out the + line at the location of the warning if the line text is not explicitly + specified. + """ + if file is None: + file = sys.stderr + if line is None: + line = '' + file.write(warnings.formatwarning(message, category, filename, lineno, line)) + + +def _parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('-v', '--verbose', + action='store_true', + help='show verbose output') + parser.add_argument('-w', + action='store_true', + help='suppress warnings') + parser.add_argument('-Werror', + action='store_true', + help='promote warnings to errors') + parser.add_argument('--llvm-mca-binary', + metavar='', + default='llvm-mca', + help='the binary to use to generate the test case ' + '(default: llvm-mca)') + parser.add_argument('tests', + metavar='', + nargs='+') + args = parser.parse_args() + + _configure_warnings(args) + + if os.path.basename(args.llvm_mca_binary) != 'llvm-mca': + _warn('unexpected binary name: {}'.format(args.llvm_mca_binary)) + + return args + + +def _find_run_lines(input_lines, args): + raw_lines = [m.group(1) + for m in [common.RUN_LINE_RE.match(l) for l in input_lines] + if m] + run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] + for l in raw_lines[1:]: + if run_lines[-1].endswith(r'\\'): + run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l + else: + run_lines.append(l) + + if args.verbose: + sys.stderr.write('Found {} RUN line{}:\n'.format( + len(run_lines), '' if len(run_lines) == 1 else 's')) + for line in run_lines: + sys.stderr.write(' RUN: {}\n'.format(line)) + + return run_lines + + +def _get_run_infos(run_lines, args): + run_infos = [] + for run_line in run_lines: + try: + (tool_cmd, filecheck_cmd) = tuple([cmd.strip() + for cmd in run_line.split('|', 1)]) + except ValueError: + _warn('could not split tool and filecheck commands: {}'.format(run_line)) + continue + + tool_basename = os.path.basename(args.llvm_mca_binary) + + if not tool_cmd.startswith(tool_basename + ' '): + _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line)) + continue + + if not filecheck_cmd.startswith('FileCheck '): + _warn('skipping non-FileCheck RUN line: {}'.format(run_line)) + continue + + tool_cmd_args = tool_cmd[len(tool_basename):].strip() + tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() + + check_prefixes = [item + for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) + for item in m.group(1).split(',')] + if not check_prefixes: + check_prefixes = ['CHECK'] + + run_infos.append((check_prefixes, tool_cmd_args)) + + return run_infos + + +def _get_block_infos(run_infos, test_path, args): # noqa + """ For each run line, run the tool with the specified args and collect the + output. We use the concept of 'blocks' for uniquing, where a block is + a series of lines of text with no more than one newline character between + each one. For example: + + This + is + one + block + + This is + another block + + This is yet another block + + We then build up a 'block_infos' structure containing a dict where the + text of each block is the key and a list of the sets of prefixes that may + generate that particular block. This then goes through a series of + transformations to minimise the amount of CHECK lines that need to be + written by taking advantage of common prefixes. + """ + + def _block_key(tool_args, prefixes): + """ Get a hashable key based on the current tool_args and prefixes. + """ + return ' '.join([tool_args] + prefixes) + + all_blocks = {} + max_block_len = 0 + + # Run the tool for each run line to generate all of the blocks. + for prefixes, tool_args in run_infos: + key = _block_key(tool_args, prefixes) + raw_tool_output = common.invoke_tool(args.llvm_mca_binary, + tool_args, + test_path) + + # Replace any lines consisting of purely whitespace with empty lines. + raw_tool_output = '\n'.join(line if line.strip() else '' + for line in raw_tool_output.splitlines()) + + # Split blocks, stripping all trailing whitespace, but keeping preceding + # whitespace except for newlines so that columns will line up visually. + all_blocks[key] = [b.lstrip('\n').rstrip() + for b in raw_tool_output.split('\n\n')] + max_block_len = max(max_block_len, len(all_blocks[key])) + + # If necessary, pad the lists of blocks with empty blocks so that they are + # all the same length. + for key in all_blocks: + len_to_pad = max_block_len - len(all_blocks[key]) + all_blocks[key] += [''] * len_to_pad + + # Create the block_infos structure where it is a nested dict in the form of: + # block number -> block text -> list of prefix sets + block_infos = defaultdict(lambda: defaultdict(list)) + for prefixes, tool_args in run_infos: + key = _block_key(tool_args, prefixes) + for block_num, block_text in enumerate(all_blocks[key]): + block_infos[block_num][block_text].append(set(prefixes)) + + # Now go through the block_infos structure and attempt to smartly prune the + # number of prefixes per block to the minimal set possible to output. + for block_num in range(len(block_infos)): + + # When there are multiple block texts for a block num, remove any + # prefixes that are common to more than one of them. + # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ] + all_sets = [s for s in block_infos[block_num].values()] + pruned_sets = [] + + for i, setlist in enumerate(all_sets): + other_set_values = set([elem for j, setlist2 in enumerate(all_sets) + for set_ in setlist2 for elem in set_ + if i != j]) + pruned_sets.append([s - other_set_values for s in setlist]) + + for i, block_text in enumerate(block_infos[block_num]): + + # When a block text matches multiple sets of prefixes, try removing any + # prefixes that aren't common to all of them. + # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}] + common_values = pruned_sets[i][0].copy() + for s in pruned_sets[i][1:]: + common_values &= s + if common_values: + pruned_sets[i] = [common_values] + + # Everything should be uniqued as much as possible by now. Apply the + # newly pruned sets to the block_infos structure. + # If there are any blocks of text that still match multiple prefixes, + # output a warning. + current_set = set() + for s in pruned_sets[i]: + s = sorted(list(s)) + if s: + current_set.add(s[0]) + if len(s) > 1: + _warn('Multiple prefixes generating same output: {} ' + '(discarding {})'.format(','.join(s), ','.join(s[1:]))) + + block_infos[block_num][block_text] = sorted(list(current_set)) + + return block_infos + + +def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa + args): + prefix_set = set([prefix for prefixes, _ in prefix_list + for prefix in prefixes]) + not_prefix_set = set() + + output_lines = [ADVERT] + for input_line in input_lines: + if input_line.startswith(ADVERT_PREFIX): + continue + + if input_line.startswith(COMMENT_CHAR): + m = common.CHECK_RE.match(input_line) + try: + prefix = m.group(1) + except AttributeError: + prefix = None + + if '{}-NOT:'.format(prefix) in input_line: + not_prefix_set.add(prefix) + + if prefix not in prefix_set or prefix in not_prefix_set: + output_lines.append(input_line) + continue + + if common.should_add_line_to_output(input_line, prefix_set): + # This input line of the function body will go as-is into the output. + # Except make leading whitespace uniform: 2 spaces. + input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line) + + # Skip empty lines if the previous output line is also empty. + if input_line or output_lines[-1]: + output_lines.append(input_line) + else: + continue + + # Add a blank line before the new checks if required. + if output_lines[-1]: + output_lines.append('') + + for block_num in range(len(block_infos)): + for block_text in sorted(block_infos[block_num]): + if not block_text: + continue + + if block_infos[block_num][block_text]: + lines = block_text.split('\n') + for prefix in block_infos[block_num][block_text]: + if prefix in not_prefix_set: + _warn('not writing for prefix {0} due to presence of "{0}-NOT:" ' + 'in input file.'.format(prefix)) + continue + + output_lines.append( + '{} {}: {}'.format(COMMENT_CHAR, prefix, lines[0])) + for line in lines[1:]: + output_lines.append( + '{} {}-NEXT: {}'.format(COMMENT_CHAR, prefix, line)) + output_lines.append('') + + if args.verbose: + sys.stderr.write( + 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path)) + + with open(test_path, 'wb') as f: + for line in output_lines: + f.write('{}\n'.format(line).encode()) + + +def main(): + args = _parse_args() + test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] + for test_path in test_paths: + sys.stderr.write('Test path: {}\n'.format(test_path)) + # Call this per test. By default each warning will only be written once + # per source location. Reset the warning filter so that now each warning + # will be written once per source location per test. + _configure_warnings(args) + + if args.verbose: + sys.stderr.write( + 'Scanning for RUN lines in test file: {}\n'.format(test_path)) + + if not os.path.isfile(test_path): + raise Error('could not find test file: {}'.format(test_path)) + + with open(test_path) as f: + input_lines = [l.rstrip() for l in f] + + run_lines = _find_run_lines(input_lines, args) + run_infos = _get_run_infos(run_lines, args) + block_infos = _get_block_infos(run_infos, test_path, args) + _write_output(test_path, input_lines, run_infos, block_infos, args) + + return 0 + + +if __name__ == '__main__': + try: + warnings.showwarning = _showwarning + sys.exit(main()) + except Error as e: + sys.stdout.write('error: {}\n'.format(e)) + sys.exit(1)