Index: llvm/trunk/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s +++ llvm/trunk/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s @@ -3,11 +3,12 @@ b t -# CHECK: Iterations: 600 -# CHECK-NEXT: Instructions: 600 -# CHECK-NEXT: Total Cycles: 603 -# CHECK-NEXT: Dispatch Width: 3 -# CHECK-NEXT: IPC: 1.00 +# CHECK: Iterations: 600 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 603 +# CHECK-NEXT: Dispatch Width: 3 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s +++ llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s @@ -4,17 +4,18 @@ b t -# M3: Iterations: 300 -# M3-NEXT: Instructions: 300 -# M3-NEXT: Total Cycles: 51 -# M3-NEXT: Dispatch Width: 6 -# M3-NEXT: IPC: 5.88 - -# M1: Iterations: 300 -# M1-NEXT: Instructions: 300 -# M1-NEXT: Total Cycles: 76 -# M1-NEXT: Dispatch Width: 4 -# M1-NEXT: IPC: 3.95 +# ALL: Iterations: 300 +# ALL-NEXT: Instructions: 300 + +# M1-NEXT: Total Cycles: 76 +# M1-NEXT: Dispatch Width: 4 +# M1-NEXT: IPC: 3.95 +# M1-NEXT: Block RThroughput: 0.3 + +# M3-NEXT: Total Cycles: 51 +# M3-NEXT: Dispatch Width: 6 +# M3-NEXT: IPC: 5.88 +# M3-NEXT: Block RThroughput: 0.2 # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -1,9 +1,21 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M1 b t +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 1 +# ALL-NEXT: Total Cycles: 2 + +# M1-NEXT: Dispatch Width: 4 +# M3-NEXT: Dispatch Width: 6 + +# ALL-NEXT: IPC: 0.50 + +# M1-NEXT: Block RThroughput: 0.3 +# M3-NEXT: Block RThroughput: 0.2 + # ALL: Schedulers - number of cycles where we saw N instructions issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 1 (50.0%) Index: llvm/trunk/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s +++ llvm/trunk/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s @@ -3,11 +3,12 @@ stp d0, d1, [x0] -# CHECK: Iterations: 2 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 8 -# CHECK-NEXT: IPC: 0.50 +# CHECK: Iterations: 2 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 8 +# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s +++ llvm/trunk/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s @@ -3,11 +3,12 @@ vadd.f32 s0, s2, s2 -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 100 -# CHECK-NEXT: Total Cycles: 105 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.95 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 100 +# CHECK-NEXT: Total Cycles: 105 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.95 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s +++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s @@ -10,23 +10,24 @@ # DISABLED-NOT: Instruction Info: -# ENABLED: Iterations: 100 -# ENABLED-NEXT: Instructions: 300 -# ENABLED-NEXT: Total Cycles: 209 -# ENABLED-NEXT: Dispatch Width: 2 -# ENABLED-NEXT: IPC: 1.44 -# ENABLED-NEXT: Block RThroughput: 2.0 - -# ENABLED: Instruction Info: -# ENABLED-NEXT: [1]: #uOps -# ENABLED-NEXT: [2]: Latency -# ENABLED-NEXT: [3]: RThroughput -# ENABLED-NEXT: [4]: MayLoad -# ENABLED-NEXT: [5]: MayStore -# ENABLED-NEXT: [6]: HasSideEffects - -# ENABLED: [1] [2] [3] [4] [5] [6] Instructions: -# ENABLED-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 -# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 -# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 + +# ENABLED: Iterations: 100 +# ENABLED-NEXT: Instructions: 300 +# ENABLED-NEXT: Total Cycles: 209 +# ENABLED-NEXT: Dispatch Width: 2 +# ENABLED-NEXT: IPC: 1.44 +# ENABLED-NEXT: Block RThroughput: 2.0 + +# ENABLED: Instruction Info: +# ENABLED-NEXT: [1]: #uOps +# ENABLED-NEXT: [2]: Latency +# ENABLED-NEXT: [3]: RThroughput +# ENABLED-NEXT: [4]: MayLoad +# ENABLED-NEXT: [5]: MayStore +# ENABLED-NEXT: [6]: HasSideEffects + +# ENABLED: [1] [2] [3] [4] [5] [6] Instructions: +# ENABLED-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 +# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 +# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 Index: llvm/trunk/test/tools/llvm-mca/X86/bextr-read-after-ld.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ llvm/trunk/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -8,81 +8,69 @@ add %edi, %esi bextrl %esi, (%rdi), %eax -# BDWELL: Iterations: 1 -# BDWELL-NEXT: Instructions: 2 -# BDWELL-NEXT: Total Cycles: 10 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.20 - -# HASWELL: Iterations: 1 -# HASWELL-NEXT: Instructions: 2 -# HASWELL-NEXT: Total Cycles: 10 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.20 - -# SKYLAKE: Iterations: 1 -# SKYLAKE-NEXT: Instructions: 2 -# SKYLAKE-NEXT: Total Cycles: 10 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.20 - -# BTVER2: Iterations: 1 -# BTVER2-NEXT: Instructions: 2 -# BTVER2-NEXT: Total Cycles: 7 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.29 - -# ZNVER1: Iterations: 1 -# ZNVER1-NEXT: Instructions: 2 -# ZNVER1-NEXT: Total Cycles: 8 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.25 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects - -# ZNVER1: [1] [2] [3] [4] [5] [6] Instructions: -# ZNVER1-NEXT: 1 1 0.25 addl %edi, %esi -# ZNVER1-NEXT: 2 5 0.50 * bextrl %esi, (%rdi), %eax - -# BDWELL: [1] [2] [3] [4] [5] [6] Instructions: -# BDWELL-NEXT: 1 1 0.25 addl %edi, %esi -# BDWELL-NEXT: 3 7 0.50 * bextrl %esi, (%rdi), %eax +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 2 + +# BDWELL-NEXT: Total Cycles: 10 +# BDWELL-NEXT: Dispatch Width: 4 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 1.0 + +# BTVER2-NEXT: Total Cycles: 7 +# BTVER2-NEXT: Dispatch Width: 2 +# BTVER2-NEXT: IPC: 0.29 +# BTVER2-NEXT: Block RThroughput: 1.0 + +# HASWELL-NEXT: Total Cycles: 10 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.20 +# HASWELL-NEXT: Block RThroughput: 1.0 + +# SKYLAKE-NEXT: Total Cycles: 10 +# SKYLAKE-NEXT: Dispatch Width: 6 +# SKYLAKE-NEXT: IPC: 0.20 +# SKYLAKE-NEXT: Block RThroughput: 0.7 + +# ZNVER1-NEXT: Total Cycles: 8 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.25 +# ZNVER1-NEXT: Block RThroughput: 0.8 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# BDWELL-NEXT: 1 1 0.25 addl %edi, %esi +# BDWELL-NEXT: 3 7 0.50 * bextrl %esi, (%rdi), %eax + +# BTVER2-NEXT: 1 1 0.50 addl %edi, %esi +# BTVER2-NEXT: 1 4 1.00 * bextrl %esi, (%rdi), %eax -# HASWELL: [1] [2] [3] [4] [5] [6] Instructions: # HASWELL-NEXT: 1 1 0.25 addl %edi, %esi # HASWELL-NEXT: 3 7 0.50 * bextrl %esi, (%rdi), %eax -# SKYLAKE: [1] [2] [3] [4] [5] [6] Instructions: # SKYLAKE-NEXT: 1 1 0.25 addl %edi, %esi # SKYLAKE-NEXT: 3 7 0.50 * bextrl %esi, (%rdi), %eax -# BTVER2: [1] [2] [3] [4] [5] [6] Instructions: -# BTVER2-NEXT: 1 1 0.50 addl %edi, %esi -# BTVER2-NEXT: 1 4 1.00 * bextrl %esi, (%rdi), %eax +# ZNVER1-NEXT: 1 1 0.25 addl %edi, %esi +# ZNVER1-NEXT: 2 5 0.50 * bextrl %esi, (%rdi), %eax -# BTVER2: Timeline view: -# BTVER2-NEXT: Index 0123456 +# ALL: Timeline view: -# ZNVER1: Timeline view: -# ZNVER1-NEXT: Index 01234567 - -# BDWELL: Timeline view: -# BDWELL-NEXT: Index 0123456789 - -# HASWELL: Timeline view: +# BDWELL-NEXT: Index 0123456789 +# BTVER2-NEXT: Index 0123456 # HASWELL-NEXT: Index 0123456789 - -# SKYLAKE: Timeline view: # SKYLAKE-NEXT: Index 0123456789 +# ZNVER1-NEXT: Index 01234567 -# BDWELL: [0,0] DeER . . addl %edi, %esi -# BDWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax +# BDWELL: [0,0] DeER . . addl %edi, %esi +# BDWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax # HASWELL: [0,0] DeER . . addl %edi, %esi # HASWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax @@ -90,19 +78,19 @@ # SKYLAKE: [0,0] DeER . . addl %edi, %esi # SKYLAKE-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax -# ZNVER1: [0,0] DeER . . addl %edi, %esi -# ZNVER1-NEXT: [0,1] DeeeeeER bextrl %esi, (%rdi), %eax +# ZNVER1: [0,0] DeER . . addl %edi, %esi +# ZNVER1-NEXT: [0,1] DeeeeeER bextrl %esi, (%rdi), %eax -# BTVER2: [0,0] DeER .. addl %edi, %esi -# BTVER2-NEXT: [0,1] DeeeeER bextrl %esi, (%rdi), %eax +# BTVER2: [0,0] DeER .. addl %edi, %esi +# BTVER2-NEXT: [0,1] DeeeeER bextrl %esi, (%rdi), %eax -# ALL: Average Wait times (based on the timeline view): -# ALL-NEXT: [0]: Executions -# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue -# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# ALL-NEXT: [3]: Average time elapsed from WB until retire stage - -# ALL: [0] [1] [2] [3] -# ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi -# ALL-NEXT: 1. 1 1.0 0.0 0.0 bextrl %esi, (%rdi), %eax +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi +# ALL-NEXT: 1. 1 1.0 0.0 0.0 bextrl %esi, (%rdi), %eax Index: llvm/trunk/test/tools/llvm-mca/X86/bzhi-read-after-ld.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/bzhi-read-after-ld.s +++ llvm/trunk/test/tools/llvm-mca/X86/bzhi-read-after-ld.s @@ -7,68 +7,54 @@ add %edi, %esi bzhil %esi, (%rdi), %eax -# ZNVER1: Iterations: 1 -# ZNVER1-NEXT: Instructions: 2 -# ZNVER1-NEXT: Total Cycles: 8 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.25 - -# BDWELL: Iterations: 1 -# BDWELL-NEXT: Instructions: 2 -# BDWELL-NEXT: Total Cycles: 9 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.22 - -# HASWELL: Iterations: 1 -# HASWELL-NEXT: Instructions: 2 -# HASWELL-NEXT: Total Cycles: 9 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.22 - -# SKYLAKE: Iterations: 1 -# SKYLAKE-NEXT: Instructions: 2 -# SKYLAKE-NEXT: Total Cycles: 9 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.22 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects - -# ZNVER1: [1] [2] [3] [4] [5] [6] Instructions: -# ZNVER1-NEXT: 1 1 0.25 addl %edi, %esi -# ZNVER1-NEXT: 2 5 0.50 * bzhil %esi, (%rdi), %eax - -# BDWELL: [1] [2] [3] [4] [5] [6] Instructions: -# BDWELL-NEXT: 1 1 0.25 addl %edi, %esi -# BDWELL-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 2 -# HASWELL: [1] [2] [3] [4] [5] [6] Instructions: -# HASWELL-NEXT: 1 1 0.25 addl %edi, %esi -# HASWELL-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax +# BDWELL-NEXT: Total Cycles: 9 +# BDWELL-NEXT: Dispatch Width: 4 +# BDWELL-NEXT: IPC: 0.22 +# BDWELL-NEXT: Block RThroughput: 0.8 + +# HASWELL-NEXT: Total Cycles: 9 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.22 +# HASWELL-NEXT: Block RThroughput: 0.8 + +# SKYLAKE-NEXT: Total Cycles: 9 +# SKYLAKE-NEXT: Dispatch Width: 6 +# SKYLAKE-NEXT: IPC: 0.22 +# SKYLAKE-NEXT: Block RThroughput: 0.5 + +# ZNVER1-NEXT: Total Cycles: 8 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.25 +# ZNVER1-NEXT: Block RThroughput: 0.8 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects -# SKYLAKE: [1] [2] [3] [4] [5] [6] Instructions: -# SKYLAKE-NEXT: 1 1 0.25 addl %edi, %esi -# SKYLAKE-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.25 addl %edi, %esi -# ZNVER1: Timeline view: -# ZNVER1-NEXT: Index 01234567 +# BDWELL-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax +# HASWELL-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax +# SKYLAKE-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax +# ZNVER1-NEXT: 2 5 0.50 * bzhil %esi, (%rdi), %eax -# BDWELL: Timeline view: -# BDWELL-NEXT: Index 012345678 +# ALL: Timeline view: -# HASWELL: Timeline view: +# BDWELL-NEXT: Index 012345678 # HASWELL-NEXT: Index 012345678 - -# SKYLAKE: Timeline view: # SKYLAKE-NEXT: Index 012345678 +# ZNVER1-NEXT: Index 01234567 -# BDWELL: [0,0] DeER . . addl %edi, %esi -# BDWELL-NEXT: [0,1] DeeeeeeER bzhil %esi, (%rdi), %eax +# BDWELL: [0,0] DeER . . addl %edi, %esi +# BDWELL-NEXT: [0,1] DeeeeeeER bzhil %esi, (%rdi), %eax # HASWELL: [0,0] DeER . . addl %edi, %esi # HASWELL-NEXT: [0,1] DeeeeeeER bzhil %esi, (%rdi), %eax @@ -76,16 +62,16 @@ # SKYLAKE: [0,0] DeER . . addl %edi, %esi # SKYLAKE-NEXT: [0,1] DeeeeeeER bzhil %esi, (%rdi), %eax -# ZNVER1: [0,0] DeER . . addl %edi, %esi -# ZNVER1-NEXT: [0,1] DeeeeeER bzhil %esi, (%rdi), %eax +# ZNVER1: [0,0] DeER . . addl %edi, %esi +# ZNVER1-NEXT: [0,1] DeeeeeER bzhil %esi, (%rdi), %eax -# ALL: Average Wait times (based on the timeline view): -# ALL-NEXT: [0]: Executions -# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue -# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# ALL-NEXT: [3]: Average time elapsed from WB until retire stage - -# ALL: [0] [1] [2] [3] -# ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi -# ALL-NEXT: 1. 1 1.0 0.0 0.0 bzhil %esi, (%rdi), %eax +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi +# ALL-NEXT: 1. 1 1.0 0.0 0.0 bzhil %esi, (%rdi), %eax Index: llvm/trunk/test/tools/llvm-mca/X86/cpus.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/cpus.s +++ llvm/trunk/test/tools/llvm-mca/X86/cpus.s @@ -12,63 +12,31 @@ add %edi, %eax -# BTVER2: Iterations: 100 -# BTVER2-NEXT: Instructions: 100 -# BTVER2-NEXT: Total Cycles: 103 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.97 - -# SLM: Iterations: 100 -# SLM-NEXT: Instructions: 100 -# SLM-NEXT: Total Cycles: 103 -# SLM-NEXT: Dispatch Width: 2 -# SLM-NEXT: IPC: 0.97 - -# BROADWELL: Iterations: 100 -# BROADWELL-NEXT: Instructions: 100 -# BROADWELL-NEXT: Total Cycles: 103 -# BROADWELL-NEXT: Dispatch Width: 4 -# BROADWELL-NEXT: IPC: 0.97 - -# HASWELL: Iterations: 100 -# HASWELL-NEXT: Instructions: 100 -# HASWELL-NEXT: Total Cycles: 103 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.97 - -# IVYBRIDGE: Iterations: 100 -# IVYBRIDGE-NEXT: Instructions: 100 -# IVYBRIDGE-NEXT: Total Cycles: 103 -# IVYBRIDGE-NEXT: Dispatch Width: 4 -# IVYBRIDGE-NEXT: IPC: 0.97 - -# KNL: Iterations: 100 -# KNL-NEXT: Instructions: 100 -# KNL-NEXT: Total Cycles: 103 -# KNL-NEXT: Dispatch Width: 4 -# KNL-NEXT: IPC: 0.97 - -# SANDYBRIDGE: Iterations: 100 -# SANDYBRIDGE-NEXT: Instructions: 100 -# SANDYBRIDGE-NEXT: Total Cycles: 103 -# SANDYBRIDGE-NEXT: Dispatch Width: 4 -# SANDYBRIDGE-NEXT: IPC: 0.97 - -# ZNVER1: Iterations: 100 -# ZNVER1-NEXT: Instructions: 100 -# ZNVER1-NEXT: Total Cycles: 103 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.97 - -# SKX: Iterations: 100 -# SKX-NEXT: Instructions: 100 -# SKX-NEXT: Total Cycles: 103 -# SKX-NEXT: Dispatch Width: 6 -# SKX-NEXT: IPC: 0.97 - -# SKX-AVX512: Iterations: 100 -# SKX-AVX512-NEXT: Instructions: 100 -# SKX-AVX512-NEXT: Total Cycles: 103 -# SKX-AVX512-NEXT: Dispatch Width: 6 -# SKX-AVX512-NEXT: IPC: 0.97 +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 100 +# ALL-NEXT: Total Cycles: 103 + +# BROADWELL-NEXT: Dispatch Width: 4 +# BTVER2-NEXT: Dispatch Width: 2 +# HASWELL-NEXT: Dispatch Width: 4 +# IVYBRIDGE-NEXT: Dispatch Width: 4 +# KNL-NEXT: Dispatch Width: 4 +# SANDYBRIDGE-NEXT: Dispatch Width: 4 +# SKX-NEXT: Dispatch Width: 6 +# SKX-AVX512-NEXT: Dispatch Width: 6 +# SLM-NEXT: Dispatch Width: 2 +# ZNVER1-NEXT: Dispatch Width: 4 + +# ALL-NEXT: IPC: 0.97 + +# BROADWELL-NEXT: Block RThroughput: 0.3 +# BTVER2-NEXT: Block RThroughput: 0.5 +# HASWELL-NEXT: Block RThroughput: 0.3 +# IVYBRIDGE-NEXT: Block RThroughput: 0.3 +# KNL-NEXT: Block RThroughput: 0.3 +# SANDYBRIDGE-NEXT: Block RThroughput: 0.3 +# SKX-NEXT: Block RThroughput: 0.3 +# SKX-AVX512-NEXT: Block RThroughput: 0.3 +# SLM-NEXT: Block RThroughput: 0.5 +# ZNVER1-NEXT: Block RThroughput: 0.3 Index: llvm/trunk/test/tools/llvm-mca/X86/default-iterations.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/default-iterations.s +++ llvm/trunk/test/tools/llvm-mca/X86/default-iterations.s @@ -5,26 +5,28 @@ add %eax, %eax -# CUSTOM: Iterations: 1 -# CUSTOM-NEXT: Instructions: 1 -# CUSTOM-NEXT: Total Cycles: 4 -# CUSTOM-NEXT: Dispatch Width: 2 -# CUSTOM-NEXT: IPC: 0.25 - -# DEFAULT: Iterations: 100 -# DEFAULT-NEXT: Instructions: 100 -# DEFAULT-NEXT: Total Cycles: 103 -# DEFAULT-NEXT: Dispatch Width: 2 -# DEFAULT-NEXT: IPC: 0.97 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects +# CUSTOM: Iterations: 1 +# CUSTOM-NEXT: Instructions: 1 +# CUSTOM-NEXT: Total Cycles: 4 +# CUSTOM-NEXT: Dispatch Width: 2 +# CUSTOM-NEXT: IPC: 0.25 +# CUSTOM-NEXT: Block RThroughput: 0.5 + +# DEFAULT: Iterations: 100 +# DEFAULT-NEXT: Instructions: 100 +# DEFAULT-NEXT: Total Cycles: 103 +# DEFAULT-NEXT: Dispatch Width: 2 +# DEFAULT-NEXT: IPC: 0.97 +# DEFAULT-NEXT: Block RThroughput: 0.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 1 0.50 addl %eax, %eax +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 addl %eax, %eax Index: llvm/trunk/test/tools/llvm-mca/X86/dispatch_width.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/dispatch_width.s +++ llvm/trunk/test/tools/llvm-mca/X86/dispatch_width.s @@ -5,15 +5,15 @@ add %eax, %eax -# CUSTOM: Iterations: 100 -# CUSTOM-NEXT: Instructions: 100 -# CUSTOM-NEXT: Total Cycles: 103 -# CUSTOM-NEXT: Dispatch Width: 1 -# CUSTOM-NEXT: IPC: 0.97 - -# DEFAULT: Iterations: 100 -# DEFAULT-NEXT: Instructions: 100 -# DEFAULT-NEXT: Total Cycles: 103 -# DEFAULT-NEXT: Dispatch Width: 2 -# DEFAULT-NEXT: IPC: 0.97 +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 100 +# ALL-NEXT: Total Cycles: 103 + +# CUSTOM-NEXT: Dispatch Width: 1 +# DEFAULT-NEXT: Dispatch Width: 2 + +# ALL-NEXT: IPC: 0.97 + +# CUSTOM-NEXT: Block RThroughput: 1.0 +# DEFAULT-NEXT: Block RThroughput: 0.5 Index: llvm/trunk/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s @@ -10,65 +10,55 @@ vaddps %xmm0, %xmm0, %xmm1 vfmadd213ps (%rdi), %xmm1, %xmm2 -# BDWELL: Iterations: 1 -# BDWELL-NEXT: Instructions: 2 -# BDWELL-NEXT: Total Cycles: 13 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.15 - -# SKYLAKE: Iterations: 1 -# SKYLAKE-NEXT: Instructions: 2 -# SKYLAKE-NEXT: Total Cycles: 13 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.15 - -# HASWELL: Iterations: 1 -# HASWELL-NEXT: Instructions: 2 -# HASWELL-NEXT: Total Cycles: 14 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.14 - -# ZNVER1: Iterations: 1 -# ZNVER1-NEXT: Instructions: 2 -# ZNVER1-NEXT: Total Cycles: 15 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.13 - -# BDWELL: Timeline view: -# BDWELL-NEXT: 012 -# BDWELL-NEXT: Index 0123456789 +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 2 -# SKYLAKE: Timeline view: -# SKYLAKE-NEXT: 012 -# SKYLAKE-NEXT: Index 0123456789 +# BDWELL-NEXT: Total Cycles: 13 +# BDWELL-NEXT: Dispatch Width: 4 +# BDWELL-NEXT: IPC: 0.15 + +# HASWELL-NEXT: Total Cycles: 14 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.14 + +# SKYLAKE-NEXT: Total Cycles: 13 +# SKYLAKE-NEXT: Dispatch Width: 6 +# SKYLAKE-NEXT: IPC: 0.15 + +# ZNVER1-NEXT: Total Cycles: 15 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.13 -# HASWELL: Timeline view: +# ALL-NEXT: Block RThroughput: 1.0 + +# ALL: Timeline view: + +# BDWELL-NEXT: 012 # HASWELL-NEXT: 0123 -# HASWELL-NEXT: Index 0123456789 +# SKYLAKE-NEXT: 012 +# ZNVER1-NEXT: 01234 -# ZNVER1: Timeline view: -# ZNVER1-NEXT: 01234 -# ZNVER1-NEXT: Index 0123456789 +# ALL-NEXT: Index 0123456789 -# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 -# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 +# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 # HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 # HASWELL-NEXT: [0,1] DeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 -# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 -# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 +# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 # SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm1 # SKYLAKE-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 -# ALL: Average Wait times (based on the timeline view): -# ALL-NEXT: [0]: Executions -# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue -# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# ALL-NEXT: [3]: Average time elapsed from WB until retire stage - -# ALL: [0] [1] [2] [3] -# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# ALL-NEXT: 1. 1 1.0 0.0 0.0 vfmadd213ps (%rdi), %xmm1, %xmm2 +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 +# ALL-NEXT: 1. 1 1.0 0.0 0.0 vfmadd213ps (%rdi), %xmm1, %xmm2 Index: llvm/trunk/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s @@ -10,65 +10,55 @@ vaddps %xmm0, %xmm0, %xmm2 vfmadd213ps (%rdi), %xmm1, %xmm2 -# BDWELL: Iterations: 1 -# BDWELL-NEXT: Instructions: 2 -# BDWELL-NEXT: Total Cycles: 13 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.15 - -# SKYLAKE: Iterations: 1 -# SKYLAKE-NEXT: Instructions: 2 -# SKYLAKE-NEXT: Total Cycles: 13 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.15 - -# HASWELL: Iterations: 1 -# HASWELL-NEXT: Instructions: 2 -# HASWELL-NEXT: Total Cycles: 14 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.14 - -# ZNVER1: Iterations: 1 -# ZNVER1-NEXT: Instructions: 2 -# ZNVER1-NEXT: Total Cycles: 15 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.13 - -# BDWELL: Timeline view: -# BDWELL-NEXT: 012 -# BDWELL-NEXT: Index 0123456789 +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 2 -# SKYLAKE: Timeline view: -# SKYLAKE-NEXT: 012 -# SKYLAKE-NEXT: Index 0123456789 +# BDWELL-NEXT: Total Cycles: 13 +# BDWELL-NEXT: Dispatch Width: 4 +# BDWELL-NEXT: IPC: 0.15 + +# HASWELL-NEXT: Total Cycles: 14 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.14 + +# SKYLAKE-NEXT: Total Cycles: 13 +# SKYLAKE-NEXT: Dispatch Width: 6 +# SKYLAKE-NEXT: IPC: 0.15 + +# ZNVER1-NEXT: Total Cycles: 15 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.13 -# HASWELL: Timeline view: +# ALL-NEXT: Block RThroughput: 1.0 + +# ALL: Timeline view: + +# BDWELL-NEXT: 012 # HASWELL-NEXT: 0123 -# HASWELL-NEXT: Index 0123456789 +# SKYLAKE-NEXT: 012 +# ZNVER1-NEXT: 01234 -# ZNVER1: Timeline view: -# ZNVER1-NEXT: 01234 -# ZNVER1-NEXT: Index 0123456789 +# ALL-NEXT: Index 0123456789 -# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 -# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 +# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 # HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 # HASWELL-NEXT: [0,1] DeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 -# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 -# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 +# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 # SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm2 # SKYLAKE-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 -# ALL: Average Wait times (based on the timeline view): -# ALL-NEXT: [0]: Executions -# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue -# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# ALL-NEXT: [3]: Average time elapsed from WB until retire stage - -# ALL: [0] [1] [2] [3] -# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# ALL-NEXT: 1. 1 1.0 0.0 0.0 vfmadd213ps (%rdi), %xmm1, %xmm2 +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 +# ALL-NEXT: 1. 1 1.0 0.0 0.0 vfmadd213ps (%rdi), %xmm1, %xmm2 Index: llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s +++ llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s @@ -9,29 +9,30 @@ imul esi, edi lea eax, [rsi + rdi] -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 400 -# ALL-NEXT: Total Cycles: 305 -# ALL-NEXT: Dispatch Width: 2 -# ALL-NEXT: IPC: 1.31 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 400 +# ALL-NEXT: Total Cycles: 305 +# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: IPC: 1.31 +# ALL-NEXT: Block RThroughput: 2.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# ATT-NEXT: 1 1 0.50 movl $1, %eax +# ATT-NEXT: 1 1 0.50 movl $255, %ebx +# ATT-NEXT: 2 3 1.00 imull %edi, %esi +# ATT-NEXT: 1 1 0.50 leal (%rsi,%rdi), %eax -# INTEL: [1] [2] [3] [4] [5] [6] Instructions: # INTEL-NEXT: 1 1 0.50 mov eax, 1 # INTEL-NEXT: 1 1 0.50 mov ebx, 255 # INTEL-NEXT: 2 3 1.00 imul esi, edi # INTEL-NEXT: 1 1 0.50 lea eax, [rsi + rdi] -# ATT: [1] [2] [3] [4] [5] [6] Instructions: -# ATT-NEXT: 1 1 0.50 movl $1, %eax -# ATT-NEXT: 1 1 0.50 movl $255, %ebx -# ATT-NEXT: 2 3 1.00 imull %edi, %esi -# ATT-NEXT: 1 1 0.50 leal (%rsi,%rdi), %eax - Index: llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-2.s @@ -7,11 +7,12 @@ # CHECK: [0] Code Region - Default -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 1 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.25 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 1 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-3.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-3.s +++ llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-3.s @@ -7,11 +7,12 @@ # CHECK: [0] Code Region - foo -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 1 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.25 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 1 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-4.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-4.s +++ llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-4.s @@ -11,11 +11,12 @@ # CHECK: [0] Code Region - NotEmpty -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 1 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.25 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 1 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-5.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-5.s +++ llvm/trunk/test/tools/llvm-mca/X86/llvm-mca-markers-5.s @@ -15,11 +15,12 @@ # CHECK: [0] Code Region - First Region -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 1 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.25 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 1 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -34,11 +35,12 @@ # CHECK: [1] Code Region - Second Region -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 1 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.25 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 1 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -53,11 +55,12 @@ # CHECK: [2] Code Region - Third Region -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 1 -# CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.25 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 1 +# CHECK-NEXT: Total Cycles: 4 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps Index: llvm/trunk/test/tools/llvm-mca/X86/option-all-stats-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/option-all-stats-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/option-all-stats-1.s @@ -6,22 +6,23 @@ add %eax, %eax -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 100 -# ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 -# ALL-NEXT: IPC: 0.97 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 100 +# ALL-NEXT: Total Cycles: 103 +# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: IPC: 0.97 +# ALL-NEXT: Block RThroughput: 0.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 1 0.50 addl %eax, %eax +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 addl %eax, %eax # FULLREPORT: Dynamic Dispatch Stall Cycles: # FULLREPORT-NEXT: RAT - Register unavailable: 0 Index: llvm/trunk/test/tools/llvm-mca/X86/option-all-stats-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/option-all-stats-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/option-all-stats-2.s @@ -7,22 +7,23 @@ add %eax, %eax -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 100 -# ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 -# ALL-NEXT: IPC: 0.97 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 100 +# ALL-NEXT: Total Cycles: 103 +# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: IPC: 0.97 +# ALL-NEXT: Block RThroughput: 0.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 1 0.50 addl %eax, %eax +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 addl %eax, %eax # FULL: Dynamic Dispatch Stall Cycles: # FULL-NEXT: RAT - Register unavailable: 0 Index: llvm/trunk/test/tools/llvm-mca/X86/option-all-views-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/option-all-views-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/option-all-views-1.s @@ -6,11 +6,12 @@ add %eax, %eax -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 100 -# ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 -# ALL-NEXT: IPC: 0.97 +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 100 +# ALL-NEXT: Total Cycles: 103 +# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: IPC: 0.97 +# ALL-NEXT: Block RThroughput: 0.5 # DEFAULTREPORT: Instruction Info: # DEFAULTREPORT-NEXT: [1]: #uOps @@ -23,94 +24,94 @@ # DEFAULTREPORT: [1] [2] [3] [4] [5] [6] Instructions: # DEFAULTREPORT-NEXT: 1 1 0.50 addl %eax, %eax -# FULLREPORT: Dynamic Dispatch Stall Cycles: -# FULLREPORT-NEXT: RAT - Register unavailable: 0 -# FULLREPORT-NEXT: RCU - Retire tokens unavailable: 0 -# FULLREPORT-NEXT: SCHEDQ - Scheduler full: 61 -# FULLREPORT-NEXT: LQ - Load queue full: 0 -# FULLREPORT-NEXT: SQ - Store queue full: 0 -# FULLREPORT-NEXT: GROUP - Static restrictions on the dispatch group: 0 - -# FULLREPORT: Dispatch Logic - number of cycles where we saw N instructions dispatched: -# FULLREPORT-NEXT: [# dispatched], [# cycles] -# FULLREPORT-NEXT: 0, 22 (21.4%) -# FULLREPORT-NEXT: 2, 19 (18.4%) -# FULLREPORT-NEXT: 1, 62 (60.2%) - -# FULLREPORT: Schedulers - number of cycles where we saw N instructions issued: -# FULLREPORT-NEXT: [# issued], [# cycles] -# FULLREPORT-NEXT: 0, 3 (2.9%) -# FULLREPORT-NEXT: 1, 100 (97.1%) - -# FULLREPORT: Scheduler's queue usage: -# FULLREPORT-NEXT: JALU01, 20/20 -# FULLREPORT-NEXT: JFPU01, 0/18 -# FULLREPORT-NEXT: JLSAGU, 0/12 - -# FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired: -# FULLREPORT-NEXT: [# retired], [# cycles] -# FULLREPORT-NEXT: 0, 3 (2.9%) -# FULLREPORT-NEXT: 1, 100 (97.1%) - -# FULLREPORT: Register File statistics: -# FULLREPORT-NEXT: Total number of mappings created: 200 -# FULLREPORT-NEXT: Max number of mappings used: 44 - -# FULLREPORT: * Register File #1 -- JFpuPRF: -# FULLREPORT-NEXT: Number of physical registers: 72 -# FULLREPORT-NEXT: Total number of mappings created: 0 -# FULLREPORT-NEXT: Max number of mappings used: 0 - -# FULLREPORT: * Register File #2 -- JIntegerPRF: -# FULLREPORT-NEXT: Number of physical registers: 64 -# FULLREPORT-NEXT: Total number of mappings created: 200 -# FULLREPORT-NEXT: Max number of mappings used: 44 - -# FULLREPORT: Resources: -# FULLREPORT-NEXT: [0] - JALU0 -# FULLREPORT-NEXT: [1] - JALU1 -# FULLREPORT-NEXT: [2] - JDiv -# FULLREPORT-NEXT: [3] - JFPA -# FULLREPORT-NEXT: [4] - JFPM -# FULLREPORT-NEXT: [5] - JFPU0 -# FULLREPORT-NEXT: [6] - JFPU1 -# FULLREPORT-NEXT: [7] - JLAGU -# FULLREPORT-NEXT: [8] - JMul -# FULLREPORT-NEXT: [9] - JSAGU -# FULLREPORT-NEXT: [10] - JSTC -# FULLREPORT-NEXT: [11] - JVALU0 -# FULLREPORT-NEXT: [12] - JVALU1 -# FULLREPORT-NEXT: [13] - JVIMUL - -# FULLREPORT: Resource pressure per iteration: -# FULLREPORT-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# FULLREPORT-NEXT: 0.50 0.50 - - - - - - - - - - - - - -# FULLREPORT: Resource pressure by instruction: -# FULLREPORT-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: -# FULLREPORT-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %eax, %eax - -# FULLREPORT: Timeline view: -# FULLREPORT-NEXT: 012 -# FULLREPORT-NEXT: Index 0123456789 - -# FULLREPORT: [0,0] DeER . . . addl %eax, %eax -# FULLREPORT-NEXT: [1,0] D=eER. . . addl %eax, %eax -# FULLREPORT-NEXT: [2,0] .D=eER . . addl %eax, %eax -# FULLREPORT-NEXT: [3,0] .D==eER . . addl %eax, %eax -# FULLREPORT-NEXT: [4,0] . D==eER . . addl %eax, %eax -# FULLREPORT-NEXT: [5,0] . D===eER . . addl %eax, %eax -# FULLREPORT-NEXT: [6,0] . D===eER. . addl %eax, %eax -# FULLREPORT-NEXT: [7,0] . D====eER . addl %eax, %eax -# FULLREPORT-NEXT: [8,0] . D====eER. addl %eax, %eax -# FULLREPORT-NEXT: [9,0] . D=====eER addl %eax, %eax - -# FULLREPORT: Average Wait times (based on the timeline view): -# FULLREPORT-NEXT: [0]: Executions -# FULLREPORT-NEXT: [1]: Average time spent waiting in a scheduler's queue -# FULLREPORT-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# FULLREPORT-NEXT: [3]: Average time elapsed from WB until retire stage +# FULLREPORT: Dynamic Dispatch Stall Cycles: +# FULLREPORT-NEXT: RAT - Register unavailable: 0 +# FULLREPORT-NEXT: RCU - Retire tokens unavailable: 0 +# FULLREPORT-NEXT: SCHEDQ - Scheduler full: 61 +# FULLREPORT-NEXT: LQ - Load queue full: 0 +# FULLREPORT-NEXT: SQ - Store queue full: 0 +# FULLREPORT-NEXT: GROUP - Static restrictions on the dispatch group: 0 + +# FULLREPORT: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# FULLREPORT-NEXT: [# dispatched], [# cycles] +# FULLREPORT-NEXT: 0, 22 (21.4%) +# FULLREPORT-NEXT: 2, 19 (18.4%) +# FULLREPORT-NEXT: 1, 62 (60.2%) + +# FULLREPORT: Schedulers - number of cycles where we saw N instructions issued: +# FULLREPORT-NEXT: [# issued], [# cycles] +# FULLREPORT-NEXT: 0, 3 (2.9%) +# FULLREPORT-NEXT: 1, 100 (97.1%) + +# FULLREPORT: Scheduler's queue usage: +# FULLREPORT-NEXT: JALU01, 20/20 +# FULLREPORT-NEXT: JFPU01, 0/18 +# FULLREPORT-NEXT: JLSAGU, 0/12 + +# FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired: +# FULLREPORT-NEXT: [# retired], [# cycles] +# FULLREPORT-NEXT: 0, 3 (2.9%) +# FULLREPORT-NEXT: 1, 100 (97.1%) + +# FULLREPORT: Register File statistics: +# FULLREPORT-NEXT: Total number of mappings created: 200 +# FULLREPORT-NEXT: Max number of mappings used: 44 + +# FULLREPORT: * Register File #1 -- JFpuPRF: +# FULLREPORT-NEXT: Number of physical registers: 72 +# FULLREPORT-NEXT: Total number of mappings created: 0 +# FULLREPORT-NEXT: Max number of mappings used: 0 + +# FULLREPORT: * Register File #2 -- JIntegerPRF: +# FULLREPORT-NEXT: Number of physical registers: 64 +# FULLREPORT-NEXT: Total number of mappings created: 200 +# FULLREPORT-NEXT: Max number of mappings used: 44 + +# FULLREPORT: Resources: +# FULLREPORT-NEXT: [0] - JALU0 +# FULLREPORT-NEXT: [1] - JALU1 +# FULLREPORT-NEXT: [2] - JDiv +# FULLREPORT-NEXT: [3] - JFPA +# FULLREPORT-NEXT: [4] - JFPM +# FULLREPORT-NEXT: [5] - JFPU0 +# FULLREPORT-NEXT: [6] - JFPU1 +# FULLREPORT-NEXT: [7] - JLAGU +# FULLREPORT-NEXT: [8] - JMul +# FULLREPORT-NEXT: [9] - JSAGU +# FULLREPORT-NEXT: [10] - JSTC +# FULLREPORT-NEXT: [11] - JVALU0 +# FULLREPORT-NEXT: [12] - JVALU1 +# FULLREPORT-NEXT: [13] - JVIMUL + +# FULLREPORT: Resource pressure per iteration: +# FULLREPORT-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# FULLREPORT-NEXT: 0.50 0.50 - - - - - - - - - - - - + +# FULLREPORT: Resource pressure by instruction: +# FULLREPORT-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# FULLREPORT-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %eax, %eax + +# FULLREPORT: Timeline view: +# FULLREPORT-NEXT: 012 +# FULLREPORT-NEXT: Index 0123456789 + +# FULLREPORT: [0,0] DeER . . . addl %eax, %eax +# FULLREPORT-NEXT: [1,0] D=eER. . . addl %eax, %eax +# FULLREPORT-NEXT: [2,0] .D=eER . . addl %eax, %eax +# FULLREPORT-NEXT: [3,0] .D==eER . . addl %eax, %eax +# FULLREPORT-NEXT: [4,0] . D==eER . . addl %eax, %eax +# FULLREPORT-NEXT: [5,0] . D===eER . . addl %eax, %eax +# FULLREPORT-NEXT: [6,0] . D===eER. . addl %eax, %eax +# FULLREPORT-NEXT: [7,0] . D====eER . addl %eax, %eax +# FULLREPORT-NEXT: [8,0] . D====eER. addl %eax, %eax +# FULLREPORT-NEXT: [9,0] . D=====eER addl %eax, %eax + +# FULLREPORT: Average Wait times (based on the timeline view): +# FULLREPORT-NEXT: [0]: Executions +# FULLREPORT-NEXT: [1]: Average time spent waiting in a scheduler's queue +# FULLREPORT-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# FULLREPORT-NEXT: [3]: Average time elapsed from WB until retire stage -# FULLREPORT: [0] [1] [2] [3] -# FULLREPORT-NEXT: 0. 10 3.5 0.1 0.0 addl %eax, %eax +# FULLREPORT: [0] [1] [2] [3] +# FULLREPORT-NEXT: 0. 10 3.5 0.1 0.0 addl %eax, %eax Index: llvm/trunk/test/tools/llvm-mca/X86/option-all-views-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/option-all-views-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/option-all-views-2.s @@ -7,65 +7,66 @@ add %eax, %eax -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 100 -# ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 -# ALL-NEXT: IPC: 0.97 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 1 0.50 addl %eax, %eax - -# ALL: Dynamic Dispatch Stall Cycles: -# ALL-NEXT: RAT - Register unavailable: 0 -# ALL-NEXT: RCU - Retire tokens unavailable: 0 -# ALL-NEXT: SCHEDQ - Scheduler full: 61 -# ALL-NEXT: LQ - Load queue full: 0 -# ALL-NEXT: SQ - Store queue full: 0 -# ALL-NEXT: GROUP - Static restrictions on the dispatch group: 0 - -# ALL: Dispatch Logic - number of cycles where we saw N instructions dispatched: -# ALL-NEXT: [# dispatched], [# cycles] -# ALL-NEXT: 0, 22 (21.4%) -# ALL-NEXT: 2, 19 (18.4%) -# ALL-NEXT: 1, 62 (60.2%) - -# ALL: Schedulers - number of cycles where we saw N instructions issued: -# ALL-NEXT: [# issued], [# cycles] -# ALL-NEXT: 0, 3 (2.9%) -# ALL-NEXT: 1, 100 (97.1%) - -# ALL: Scheduler's queue usage: -# ALL-NEXT: JALU01, 20/20 -# ALL-NEXT: JFPU01, 0/18 -# ALL-NEXT: JLSAGU, 0/12 - -# ALL: Retire Control Unit - number of cycles where we saw N instructions retired: -# ALL-NEXT: [# retired], [# cycles] -# ALL-NEXT: 0, 3 (2.9%) -# ALL-NEXT: 1, 100 (97.1%) - -# ALL: Register File statistics: -# ALL-NEXT: Total number of mappings created: 200 -# ALL-NEXT: Max number of mappings used: 44 - -# ALL: * Register File #1 -- JFpuPRF: -# ALL-NEXT: Number of physical registers: 72 -# ALL-NEXT: Total number of mappings created: 0 -# ALL-NEXT: Max number of mappings used: 0 - -# ALL: * Register File #2 -- JIntegerPRF: -# ALL-NEXT: Number of physical registers: 64 -# ALL-NEXT: Total number of mappings created: 200 -# ALL-NEXT: Max number of mappings used: 44 +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 100 +# ALL-NEXT: Total Cycles: 103 +# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: IPC: 0.97 +# ALL-NEXT: Block RThroughput: 0.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 addl %eax, %eax + +# ALL: Dynamic Dispatch Stall Cycles: +# ALL-NEXT: RAT - Register unavailable: 0 +# ALL-NEXT: RCU - Retire tokens unavailable: 0 +# ALL-NEXT: SCHEDQ - Scheduler full: 61 +# ALL-NEXT: LQ - Load queue full: 0 +# ALL-NEXT: SQ - Store queue full: 0 +# ALL-NEXT: GROUP - Static restrictions on the dispatch group: 0 + +# ALL: Dispatch Logic - number of cycles where we saw N instructions dispatched: +# ALL-NEXT: [# dispatched], [# cycles] +# ALL-NEXT: 0, 22 (21.4%) +# ALL-NEXT: 2, 19 (18.4%) +# ALL-NEXT: 1, 62 (60.2%) + +# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL-NEXT: [# issued], [# cycles] +# ALL-NEXT: 0, 3 (2.9%) +# ALL-NEXT: 1, 100 (97.1%) + +# ALL: Scheduler's queue usage: +# ALL-NEXT: JALU01, 20/20 +# ALL-NEXT: JFPU01, 0/18 +# ALL-NEXT: JLSAGU, 0/12 + +# ALL: Retire Control Unit - number of cycles where we saw N instructions retired: +# ALL-NEXT: [# retired], [# cycles] +# ALL-NEXT: 0, 3 (2.9%) +# ALL-NEXT: 1, 100 (97.1%) + +# ALL: Register File statistics: +# ALL-NEXT: Total number of mappings created: 200 +# ALL-NEXT: Max number of mappings used: 44 + +# ALL: * Register File #1 -- JFpuPRF: +# ALL-NEXT: Number of physical registers: 72 +# ALL-NEXT: Total number of mappings created: 0 +# ALL-NEXT: Max number of mappings used: 0 + +# ALL: * Register File #2 -- JIntegerPRF: +# ALL-NEXT: Number of physical registers: 64 +# ALL-NEXT: Total number of mappings created: 200 +# ALL-NEXT: Max number of mappings used: 44 # FULLREPORT: Resources: # FULLREPORT-NEXT: [0] - JALU0 @@ -83,37 +84,37 @@ # FULLREPORT-NEXT: [12] - JVALU1 # FULLREPORT-NEXT: [13] - JVIMUL -# NORPV: Timeline view: -# NORPV-NEXT: 012 -# NORPV-NEXT: Index 0123456789 +# NORPV: Timeline view: +# NORPV-NEXT: 012 +# NORPV-NEXT: Index 0123456789 # FULLREPORT: Resource pressure per iteration: # FULLREPORT-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] # FULLREPORT-NEXT: 0.50 0.50 - - - - - - - - - - - - -# NORPV: [0,0] DeER . . . addl %eax, %eax -# NORPV-NEXT: [1,0] D=eER. . . addl %eax, %eax -# NORPV-NEXT: [2,0] .D=eER . . addl %eax, %eax -# NORPV-NEXT: [3,0] .D==eER . . addl %eax, %eax -# NORPV-NEXT: [4,0] . D==eER . . addl %eax, %eax -# NORPV-NEXT: [5,0] . D===eER . . addl %eax, %eax -# NORPV-NEXT: [6,0] . D===eER. . addl %eax, %eax -# NORPV-NEXT: [7,0] . D====eER . addl %eax, %eax -# NORPV-NEXT: [8,0] . D====eER. addl %eax, %eax -# NORPV-NEXT: [9,0] . D=====eER addl %eax, %eax - -# NORPV: Average Wait times (based on the timeline view): -# NORPV-NEXT: [0]: Executions -# NORPV-NEXT: [1]: Average time spent waiting in a scheduler's queue -# NORPV-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# NORPV-NEXT: [3]: Average time elapsed from WB until retire stage +# NORPV: [0,0] DeER . . . addl %eax, %eax +# NORPV-NEXT: [1,0] D=eER. . . addl %eax, %eax +# NORPV-NEXT: [2,0] .D=eER . . addl %eax, %eax +# NORPV-NEXT: [3,0] .D==eER . . addl %eax, %eax +# NORPV-NEXT: [4,0] . D==eER . . addl %eax, %eax +# NORPV-NEXT: [5,0] . D===eER . . addl %eax, %eax +# NORPV-NEXT: [6,0] . D===eER. . addl %eax, %eax +# NORPV-NEXT: [7,0] . D====eER . addl %eax, %eax +# NORPV-NEXT: [8,0] . D====eER. addl %eax, %eax +# NORPV-NEXT: [9,0] . D=====eER addl %eax, %eax + +# NORPV: Average Wait times (based on the timeline view): +# NORPV-NEXT: [0]: Executions +# NORPV-NEXT: [1]: Average time spent waiting in a scheduler's queue +# NORPV-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# NORPV-NEXT: [3]: Average time elapsed from WB until retire stage # FULLREPORT: Resource pressure by instruction: # FULLREPORT-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # FULLREPORT-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %eax, %eax -# NORPV: [0] [1] [2] [3] -# NORPV-NEXT: 0. 10 3.5 0.1 0.0 addl %eax, %eax +# NORPV: [0] [1] [2] [3] +# NORPV-NEXT: 0. 10 3.5 0.1 0.0 addl %eax, %eax # FULLREPORT: Timeline view: # FULLREPORT-NEXT: 012 Index: llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s @@ -16,127 +16,106 @@ vaddps %xmm0, %xmm0, %xmm1 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# BDWELL: Iterations: 1 -# BDWELL-NEXT: Instructions: 2 -# BDWELL-NEXT: Total Cycles: 10 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.20 - -# BTVER2: Iterations: 1 -# BTVER2-NEXT: Instructions: 2 -# BTVER2-NEXT: Total Cycles: 11 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.18 - -# HASWELL: Iterations: 1 -# HASWELL-NEXT: Instructions: 2 -# HASWELL-NEXT: Total Cycles: 11 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.18 - -# IVY: Iterations: 1 -# IVY-NEXT: Instructions: 2 -# IVY-NEXT: Total Cycles: 11 -# IVY-NEXT: Dispatch Width: 4 -# IVY-NEXT: IPC: 0.18 - -# SANDY: Iterations: 1 -# SANDY-NEXT: Instructions: 2 -# SANDY-NEXT: Total Cycles: 11 -# SANDY-NEXT: Dispatch Width: 4 -# SANDY-NEXT: IPC: 0.18 - -# ZNVER1: Iterations: 1 -# ZNVER1-NEXT: Instructions: 2 -# ZNVER1-NEXT: Total Cycles: 11 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.18 - -# SKYLAKE: Iterations: 1 -# SKYLAKE-NEXT: Instructions: 2 -# SKYLAKE-NEXT: Total Cycles: 11 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.18 - -# BTVER2: Timeline view: -# BTVER2-NEXT: 0 -# BTVER2-NEXT: Index 0123456789 +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 2 + +# BDWELL-NEXT: Total Cycles: 10 +# BDWELL-NEXT: Dispatch Width: 4 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 2.0 + +# BTVER2-NEXT: Total Cycles: 11 +# BTVER2-NEXT: Dispatch Width: 2 +# BTVER2-NEXT: IPC: 0.18 +# BTVER2-NEXT: Block RThroughput: 2.0 + +# HASWELL-NEXT: Total Cycles: 11 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.18 +# HASWELL-NEXT: Block RThroughput: 2.0 + +# IVY-NEXT: Total Cycles: 11 +# IVY-NEXT: Dispatch Width: 4 +# IVY-NEXT: IPC: 0.18 +# IVY-NEXT: Block RThroughput: 1.0 + +# SANDY-NEXT: Total Cycles: 11 +# SANDY-NEXT: Dispatch Width: 4 +# SANDY-NEXT: IPC: 0.18 +# SANDY-NEXT: Block RThroughput: 1.0 + +# SKYLAKE-NEXT: Total Cycles: 11 +# SKYLAKE-NEXT: Dispatch Width: 6 +# SKYLAKE-NEXT: IPC: 0.18 +# SKYLAKE-NEXT: Block RThroughput: 0.7 + +# ZNVER1-NEXT: Total Cycles: 11 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.18 +# ZNVER1-NEXT: Block RThroughput: 1.0 + +# BTVER2: Timeline view: +# BTVER2-NEXT: 0 +# BTVER2-NEXT: Index 0123456789 # HASWELL: Timeline view: # HASWELL-NEXT: 0 # HASWELL-NEXT: Index 0123456789 -# IVY: Timeline view: -# IVY-NEXT: 0 -# IVY-NEXT: Index 0123456789 - -# SANDY: Timeline view: -# SANDY-NEXT: 0 -# SANDY-NEXT: Index 0123456789 +# IVY: Timeline view: +# IVY-NEXT: 0 +# IVY-NEXT: Index 0123456789 + +# SANDY: Timeline view: +# SANDY-NEXT: 0 +# SANDY-NEXT: Index 0123456789 # SKYLAKE: Timeline view: # SKYLAKE-NEXT: 0 # SKYLAKE-NEXT: Index 0123456789 -# ZNVER1: Timeline view: -# ZNVER1-NEXT: 0 -# ZNVER1-NEXT: Index 0123456789 +# ZNVER1: Timeline view: +# ZNVER1-NEXT: 0 +# ZNVER1-NEXT: Index 0123456789 -# BDWELL: Timeline view: -# BDWELL-NEXT: Index 0123456789 +# BDWELL: Timeline view: +# BDWELL-NEXT: Index 0123456789 -# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # HASWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # HASWELL-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm1 # SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# ALL: Average Wait times (based on the timeline view): -# ALL-NEXT: [0]: Executions -# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue -# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# ALL-NEXT: [3]: Average time elapsed from WB until retire stage - -# BDWELL: [0] [1] [2] [3] -# BDWELL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# HASWELL: [0] [1] [2] [3] -# HASWELL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage -# IVY: [0] [1] [2] [3] -# IVY-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# IVY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# SANDY: [0] [1] [2] [3] -# SANDY-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# SANDY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# SKYLAKE: [0] [1] [2] [3] -# SKYLAKE-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 +# BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# IVY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SANDY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SKYLAKE-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# ZNVER1: [0] [1] [2] [3] -# ZNVER1-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# BTVER2: [0] [1] [2] [3] -# BTVER2-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 -# BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 Index: llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -16,127 +16,106 @@ vaddps %xmm0, %xmm0, %xmm2 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# BDWELL: Iterations: 1 -# BDWELL-NEXT: Instructions: 2 -# BDWELL-NEXT: Total Cycles: 10 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.20 - -# BTVER2: Iterations: 1 -# BTVER2-NEXT: Instructions: 2 -# BTVER2-NEXT: Total Cycles: 11 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.18 - -# HASWELL: Iterations: 1 -# HASWELL-NEXT: Instructions: 2 -# HASWELL-NEXT: Total Cycles: 11 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.18 - -# IVY: Iterations: 1 -# IVY-NEXT: Instructions: 2 -# IVY-NEXT: Total Cycles: 11 -# IVY-NEXT: Dispatch Width: 4 -# IVY-NEXT: IPC: 0.18 - -# SANDY: Iterations: 1 -# SANDY-NEXT: Instructions: 2 -# SANDY-NEXT: Total Cycles: 11 -# SANDY-NEXT: Dispatch Width: 4 -# SANDY-NEXT: IPC: 0.18 - -# ZNVER1: Iterations: 1 -# ZNVER1-NEXT: Instructions: 2 -# ZNVER1-NEXT: Total Cycles: 11 -# ZNVER1-NEXT: Dispatch Width: 4 -# ZNVER1-NEXT: IPC: 0.18 - -# SKYLAKE: Iterations: 1 -# SKYLAKE-NEXT: Instructions: 2 -# SKYLAKE-NEXT: Total Cycles: 11 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.18 - -# BTVER2: Timeline view: -# BTVER2-NEXT: 0 -# BTVER2-NEXT: Index 0123456789 +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 2 + +# BDWELL-NEXT: Total Cycles: 10 +# BDWELL-NEXT: Dispatch Width: 4 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 2.0 + +# BTVER2-NEXT: Total Cycles: 11 +# BTVER2-NEXT: Dispatch Width: 2 +# BTVER2-NEXT: IPC: 0.18 +# BTVER2-NEXT: Block RThroughput: 2.0 + +# HASWELL-NEXT: Total Cycles: 11 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.18 +# HASWELL-NEXT: Block RThroughput: 2.0 + +# IVY-NEXT: Total Cycles: 11 +# IVY-NEXT: Dispatch Width: 4 +# IVY-NEXT: IPC: 0.18 +# IVY-NEXT: Block RThroughput: 1.0 + +# SANDY-NEXT: Total Cycles: 11 +# SANDY-NEXT: Dispatch Width: 4 +# SANDY-NEXT: IPC: 0.18 +# SANDY-NEXT: Block RThroughput: 1.0 + +# SKYLAKE-NEXT: Total Cycles: 11 +# SKYLAKE-NEXT: Dispatch Width: 6 +# SKYLAKE-NEXT: IPC: 0.18 +# SKYLAKE-NEXT: Block RThroughput: 0.7 + +# ZNVER1-NEXT: Total Cycles: 11 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.18 +# ZNVER1-NEXT: Block RThroughput: 1.0 + +# BTVER2: Timeline view: +# BTVER2-NEXT: 0 +# BTVER2-NEXT: Index 0123456789 # HASWELL: Timeline view: # HASWELL-NEXT: 0 # HASWELL-NEXT: Index 0123456789 -# IVY: Timeline view: -# IVY-NEXT: 0 -# IVY-NEXT: Index 0123456789 - -# SANDY: Timeline view: -# SANDY-NEXT: 0 -# SANDY-NEXT: Index 0123456789 +# IVY: Timeline view: +# IVY-NEXT: 0 +# IVY-NEXT: Index 0123456789 + +# SANDY: Timeline view: +# SANDY-NEXT: 0 +# SANDY-NEXT: Index 0123456789 # SKYLAKE: Timeline view: # SKYLAKE-NEXT: 0 # SKYLAKE-NEXT: Index 0123456789 -# ZNVER1: Timeline view: -# ZNVER1-NEXT: 0 -# ZNVER1-NEXT: Index 0123456789 +# ZNVER1: Timeline view: +# ZNVER1-NEXT: 0 +# ZNVER1-NEXT: Index 0123456789 -# BDWELL: Timeline view: -# BDWELL-NEXT: Index 0123456789 +# BDWELL: Timeline view: +# BDWELL-NEXT: Index 0123456789 -# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # HASWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 # HASWELL-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm2 # SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# ALL: Average Wait times (based on the timeline view): -# ALL-NEXT: [0]: Executions -# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue -# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready -# ALL-NEXT: [3]: Average time elapsed from WB until retire stage - -# BDWELL: [0] [1] [2] [3] -# BDWELL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# HASWELL: [0] [1] [2] [3] -# HASWELL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage -# IVY: [0] [1] [2] [3] -# IVY-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# IVY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# SANDY: [0] [1] [2] [3] -# SANDY-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# SANDY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# SKYLAKE: [0] [1] [2] [3] -# SKYLAKE-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 +# BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# IVY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SANDY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SKYLAKE-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# ZNVER1: [0] [1] [2] [3] -# ZNVER1-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# BTVER2: [0] [1] [2] [3] -# BTVER2-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 -# BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 Index: llvm/trunk/utils/update_mca_test_checks.py =================================================================== --- llvm/trunk/utils/update_mca_test_checks.py +++ llvm/trunk/utils/update_mca_test_checks.py @@ -23,7 +23,7 @@ class Error(Exception): - """ Generic Error to be raised without printing a traceback. + """ Generic Error that can be raised without printing a traceback. """ pass @@ -137,7 +137,89 @@ return run_infos -def _get_block_infos(run_infos, test_path, args): # noqa +def _break_down_block(block_info, common_prefix): + """ Given a block_info, see if we can analyze it further to let us break it + down by prefix per-line rather than per-block. + """ + texts = block_info.keys() + prefixes = list(block_info.values()) + # Split the lines from each of the incoming block_texts and zip them so that + # each element contains the corresponding lines from each text. E.g. + # + # block_text_1: A # line 1 + # B # line 2 + # + # block_text_2: A # line 1 + # C # line 2 + # + # would become: + # + # [(A, A), # line 1 + # (B, C)] # line 2 + # + line_tuples = list(zip(*list((text.splitlines() for text in texts)))) + + # To simplify output, we'll only proceed if the very first line of the block + # texts is common to each of them. + if len(set(line_tuples[0])) != 1: + return [] + + result = [] + lresult = defaultdict(list) + for i, line in enumerate(line_tuples): + if len(set(line)) == 1: + # We're about to output a line with the common prefix. This is a sync + # point so flush any batched-up lines one prefix at a time to the output + # first. + for prefix in sorted(lresult): + result.extend(lresult[prefix]) + lresult = defaultdict(list) + + # The line is common to each block so output with the common prefix. + result.append((common_prefix, line[0])) + else: + # The line is not common to each block, or we don't have a common prefix. + # If there are no prefixes available, warn and bail out. + if not prefixes[0]: + _warn('multiple lines not disambiguated by prefixes:\n{}\n' + 'Some blocks may be skipped entirely as a result.'.format( + '\n'.join(' - {}'.format(l) for l in line))) + return [] + + # Iterate through the line from each of the blocks and add the line with + # the corresponding prefix to the current batch of results so that we can + # later output them per-prefix. + for i, l in enumerate(line): + for prefix in prefixes[i]: + lresult[prefix].append((prefix, l)) + + # Flush any remaining batched-up lines one prefix at a time to the output. + for prefix in sorted(lresult): + result.extend(lresult[prefix]) + return result + + +def _get_useful_prefix_info(run_infos): + """ Given the run_infos, calculate any prefixes that are common to every one, + and the length of the longest prefix string. + """ + try: + all_sets = [set(s) for s in list(zip(*run_infos))[0]] + common_to_all = set.intersection(*all_sets) + longest_prefix_len = max(len(p) for p in set.union(*all_sets)) + except IndexError: + common_to_all = [] + longest_prefix_len = 0 + else: + if len(common_to_all) > 1: + _warn('Multiple prefixes common to all RUN lines: {}'.format( + common_to_all)) + if common_to_all: + common_to_all = sorted(common_to_all)[0] + return common_to_all, longest_prefix_len + + +def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa """ For each run line, run the tool with the specified args and collect the output. We use the concept of 'blocks' for uniquing, where a block is a series of lines of text with no more than one newline character between @@ -202,7 +284,6 @@ # Now go through the block_infos structure and attempt to smartly prune the # number of prefixes per block to the minimal set possible to output. for block_num in range(len(block_infos)): - # When there are multiple block texts for a block num, remove any # prefixes that are common to more than one of them. # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ] @@ -220,9 +301,7 @@ # When a block text matches multiple sets of prefixes, try removing any # prefixes that aren't common to all of them. # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}] - common_values = pruned_sets[i][0].copy() - for s in pruned_sets[i][1:]: - common_values &= s + common_values = set.intersection(*pruned_sets[i]) if common_values: pruned_sets[i] = [common_values] @@ -241,11 +320,60 @@ block_infos[block_num][block_text] = sorted(list(current_set)) + # If we have multiple block_texts, try to break them down further to avoid + # the case where we have very similar block_texts repeated after each + # other. + if common_prefix and len(block_infos[block_num]) > 1: + # We'll only attempt this if each of the block_texts have the same number + # of lines as each other. + same_num_Lines = (len(set(len(k.splitlines()) + for k in block_infos[block_num].keys())) == 1) + if same_num_Lines: + breakdown = _break_down_block(block_infos[block_num], common_prefix) + if breakdown: + block_infos[block_num] = breakdown + return block_infos +def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad): + """ Write an individual block, with correct padding on the prefixes. + """ + end_prefix = ': ' + previous_prefix = None + num_lines_of_prefix = 0 + + for prefix, line in block: + if prefix in not_prefix_set: + _warn('not writing for prefix {0} due to presence of "{0}-NOT:" ' + 'in input file.'.format(prefix)) + continue + + # If the previous line isn't already blank and we're writing more than one + # line for the current prefix output a blank line first, unless either the + # current of previous prefix is common to all. + num_lines_of_prefix += 1 + if prefix != previous_prefix: + if output and output[-1]: + if num_lines_of_prefix > 1 or any(p == common_prefix + for p in (prefix, previous_prefix)): + output.append('') + num_lines_of_prefix = 0 + previous_prefix = prefix + + output.append( + '{} {}{}{} {}'.format(COMMENT_CHAR, + prefix, + end_prefix, + ' ' * (prefix_pad - len(prefix)), + line).rstrip()) + end_prefix = '-NEXT:' + + output.append('') + + def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa - args): + args, common_prefix, prefix_pad): prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) not_prefix_set = set() @@ -290,27 +418,31 @@ if not block_text: continue - if block_infos[block_num][block_text]: + if type(block_infos[block_num]) is list: + # The block is of the type output from _break_down_block(). + _write_block(output_check_lines, + block_infos[block_num], + not_prefix_set, + common_prefix, + prefix_pad) + break + elif block_infos[block_num][block_text]: + # _break_down_block() was unable to do do anything so output the block + # as-is. lines = block_text.split('\n') for prefix in block_infos[block_num][block_text]: - if prefix in not_prefix_set: - _warn('not writing for prefix {0} due to presence of "{0}-NOT:" ' - 'in input file.'.format(prefix)) - continue - - output_check_lines.append( - '{} {}: {}'.format(COMMENT_CHAR, prefix, lines[0]).rstrip()) - for line in lines[1:]: - output_check_lines.append( - '{} {}-NEXT: {}'.format(COMMENT_CHAR, prefix, line).rstrip()) - output_check_lines.append('') + _write_block(output_check_lines, + [(prefix, line) for line in lines], + not_prefix_set, + common_prefix, + prefix_pad) if output_check_lines: output_lines.insert(0, ADVERT) output_lines.extend(output_check_lines) if input_lines == output_lines: - sys.stderr.write(' [unchanged]\n') + sys.stderr.write(' [unchanged]\n') return sys.stderr.write(' [{} lines total]\n'.format(len(output_lines))) @@ -346,8 +478,15 @@ run_lines = _find_run_lines(input_lines, args) run_infos = _get_run_infos(run_lines, args) - block_infos = _get_block_infos(run_infos, test_path, args) - _write_output(test_path, input_lines, run_infos, block_infos, args) + common_prefix, prefix_pad = _get_useful_prefix_info(run_infos) + block_infos = _get_block_infos(run_infos, test_path, args, common_prefix) + _write_output(test_path, + input_lines, + run_infos, + block_infos, + args, + common_prefix, + prefix_pad) return 0