Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s + +imul %rax, %rbx +lzcnt %ax, %bx +add %ecx, %ebx + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 4 1.00 imulq %rax, %rbx +# CHECK-NEXT: 1 2 0.25 lzcntw %ax, %bx +# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeeER. imulq %rax, %rbx +# CHECK-NEXT: [0,1] DeeE--R. lzcntw %ax, %bx +# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx +# CHECK-NEXT: 1. 1 1.0 1.0 2.0 lzcntw %ax, %bx +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=6 < %s | FileCheck %s + +# The ILP is limited by the false dependency on %dx. So, the mov cannot execute +# in parallel with the add. + +add %cx, %dx +mov %ax, %dx +xor %bx, %dx + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 1129 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 3.99 +# CHECK-NEXT: Block RThroughput: 0.8 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 addw %cx, %dx +# CHECK-NEXT: 1 1 0.25 movw %ax, %dx +# CHECK-NEXT: 1 1 0.25 xorw %bx, %dx + +# CHECK: Resources: +# CHECK-NEXT: [0] - ZnAGU0 +# CHECK-NEXT: [1] - ZnAGU1 +# CHECK-NEXT: [2] - ZnALU0 +# CHECK-NEXT: [3] - ZnALU1 +# CHECK-NEXT: [4] - ZnALU2 +# CHECK-NEXT: [5] - ZnALU3 +# CHECK-NEXT: [6] - ZnDivider +# CHECK-NEXT: [7] - ZnFPU0 +# CHECK-NEXT: [8] - ZnFPU1 +# CHECK-NEXT: [9] - ZnFPU2 +# CHECK-NEXT: [10] - ZnFPU3 +# CHECK-NEXT: [11] - ZnMultiplier + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] +# CHECK-NEXT: - - 0.75 0.75 0.75 0.75 - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: +# CHECK-NEXT: - - - 0.25 0.75 - - - - - - - addw %cx, %dx +# CHECK-NEXT: - - 0.25 - - 0.75 - - - - - - movw %ax, %dx +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - xorw %bx, %dx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeER . . addw %cx, %dx +# CHECK-NEXT: [0,1] DeER . . movw %ax, %dx +# CHECK-NEXT: [0,2] D=eER. . xorw %bx, %dx +# CHECK-NEXT: [1,0] D==eER . addw %cx, %dx +# CHECK-NEXT: [1,1] .DeE-R . movw %ax, %dx +# CHECK-NEXT: [1,2] .D=eER . xorw %bx, %dx +# CHECK-NEXT: [2,0] .D==eER . addw %cx, %dx +# CHECK-NEXT: [2,1] .DeE--R . movw %ax, %dx +# CHECK-NEXT: [2,2] . DeE-R . xorw %bx, %dx +# CHECK-NEXT: [3,0] . D=eER . addw %cx, %dx +# CHECK-NEXT: [3,1] . DeE-R . movw %ax, %dx +# CHECK-NEXT: [3,2] . D=eER . xorw %bx, %dx +# CHECK-NEXT: [4,0] . D=eER. addw %cx, %dx +# CHECK-NEXT: [4,1] . DeE-R. movw %ax, %dx +# CHECK-NEXT: [4,2] . D=eER. xorw %bx, %dx +# CHECK-NEXT: [5,0] . D==eER addw %cx, %dx +# CHECK-NEXT: [5,1] . DeE-R movw %ax, %dx +# CHECK-NEXT: [5,2] . D=eER xorw %bx, %dx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 6 2.3 0.2 0.0 addw %cx, %dx +# CHECK-NEXT: 1. 6 1.0 1.0 1.0 movw %ax, %dx +# CHECK-NEXT: 2. 6 1.8 0.0 0.2 xorw %bx, %dx Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=7 < %s | FileCheck %s + +# The lzcnt cannot execute in parallel with the imul because there is a false +# dependency on %bx. + +imul %ax, %bx +lzcnt %ax, %bx +add %cx, %bx + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 1507 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 2.99 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx +# CHECK-NEXT: 1 2 0.25 lzcntw %ax, %bx +# CHECK-NEXT: 1 1 0.25 addw %cx, %bx + +# CHECK: Resources: +# CHECK-NEXT: [0] - ZnAGU0 +# CHECK-NEXT: [1] - ZnAGU1 +# CHECK-NEXT: [2] - ZnALU0 +# CHECK-NEXT: [3] - ZnALU1 +# CHECK-NEXT: [4] - ZnALU2 +# CHECK-NEXT: [5] - ZnALU3 +# CHECK-NEXT: [6] - ZnDivider +# CHECK-NEXT: [7] - ZnFPU0 +# CHECK-NEXT: [8] - ZnFPU1 +# CHECK-NEXT: [9] - ZnFPU2 +# CHECK-NEXT: [10] - ZnFPU3 +# CHECK-NEXT: [11] - ZnMultiplier + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] +# CHECK-NEXT: - - 0.67 1.00 0.67 0.67 - - - - - 1.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: +# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulw %ax, %bx +# CHECK-NEXT: - - 0.33 - 0.33 0.33 - - - - - - lzcntw %ax, %bx +# CHECK-NEXT: - - 0.33 - 0.33 0.33 - - - - - - addw %cx, %bx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeER . . imulw %ax, %bx +# CHECK-NEXT: [0,1] DeeE-R . . lzcntw %ax, %bx +# CHECK-NEXT: [0,2] D==eER . . addw %cx, %bx +# CHECK-NEXT: [1,0] D===eeeER . . imulw %ax, %bx +# CHECK-NEXT: [1,1] .DeeE---R . . lzcntw %ax, %bx +# CHECK-NEXT: [1,2] .D==eE--R . . addw %cx, %bx +# CHECK-NEXT: [2,0] .D===eeeER. . imulw %ax, %bx +# CHECK-NEXT: [2,1] .DeeE----R. . lzcntw %ax, %bx +# CHECK-NEXT: [2,2] . D=eE---R. . addw %cx, %bx +# CHECK-NEXT: [3,0] . D===eeeER . imulw %ax, %bx +# CHECK-NEXT: [3,1] . DeeE----R . lzcntw %ax, %bx +# CHECK-NEXT: [3,2] . D==eE---R . addw %cx, %bx +# CHECK-NEXT: [4,0] . D===eeeER . imulw %ax, %bx +# CHECK-NEXT: [4,1] . DeeE----R . lzcntw %ax, %bx +# CHECK-NEXT: [4,2] . D==eE---R . addw %cx, %bx +# CHECK-NEXT: [5,0] . D====eeeER. imulw %ax, %bx +# CHECK-NEXT: [5,1] . DeeE----R. lzcntw %ax, %bx +# CHECK-NEXT: [5,2] . D==eE---R. addw %cx, %bx +# CHECK-NEXT: [6,0] . D====eeeER imulw %ax, %bx +# CHECK-NEXT: [6,1] . DeeE-----R lzcntw %ax, %bx +# CHECK-NEXT: [6,2] . D=eE----R addw %cx, %bx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 7 3.9 0.7 0.0 imulw %ax, %bx +# CHECK-NEXT: 1. 7 1.0 1.0 3.6 lzcntw %ax, %bx +# CHECK-NEXT: 2. 7 2.7 0.0 2.6 addw %cx, %bx Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=8 < %s | FileCheck %s + +lzcnt %ax, %bx ## partial register stall. + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 1500 +# CHECK-NEXT: Total Cycles: 379 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 3.96 +# CHECK-NEXT: Block RThroughput: 0.3 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 2 0.25 lzcntw %ax, %bx + +# CHECK: Resources: +# CHECK-NEXT: [0] - ZnAGU0 +# CHECK-NEXT: [1] - ZnAGU1 +# CHECK-NEXT: [2] - ZnALU0 +# CHECK-NEXT: [3] - ZnALU1 +# CHECK-NEXT: [4] - ZnALU2 +# CHECK-NEXT: [5] - ZnALU3 +# CHECK-NEXT: [6] - ZnDivider +# CHECK-NEXT: [7] - ZnFPU0 +# CHECK-NEXT: [8] - ZnFPU1 +# CHECK-NEXT: [9] - ZnFPU2 +# CHECK-NEXT: [10] - ZnFPU3 +# CHECK-NEXT: [11] - ZnMultiplier + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] +# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: +# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - lzcntw %ax, %bx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345 + +# CHECK: [0,0] DeeER. lzcntw %ax, %bx +# CHECK-NEXT: [1,0] DeeER. lzcntw %ax, %bx +# CHECK-NEXT: [2,0] DeeER. lzcntw %ax, %bx +# CHECK-NEXT: [3,0] DeeER. lzcntw %ax, %bx +# CHECK-NEXT: [4,0] .DeeER lzcntw %ax, %bx +# CHECK-NEXT: [5,0] .DeeER lzcntw %ax, %bx +# CHECK-NEXT: [6,0] .DeeER lzcntw %ax, %bx +# CHECK-NEXT: [7,0] .DeeER lzcntw %ax, %bx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 8 1.0 1.0 0.0 lzcntw %ax, %bx Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=4 < %s | FileCheck %s + +# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the +# imul. However, the folded load can start immediately. +# The last lzcnt has a false dependency on %cx. However, even in this case, the +# folded load can start immediately. + +imul %edx, %ecx +lzcnt (%rsp), %cx +lzcnt 2(%rsp), %cx + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 4507 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 1.3 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx +# CHECK-NEXT: 2 6 0.50 * lzcntw (%rsp), %cx +# CHECK-NEXT: 2 6 0.50 * lzcntw 2(%rsp), %cx + +# CHECK: Resources: +# CHECK-NEXT: [0] - ZnAGU0 +# CHECK-NEXT: [1] - ZnAGU1 +# CHECK-NEXT: [2] - ZnALU0 +# CHECK-NEXT: [3] - ZnALU1 +# CHECK-NEXT: [4] - ZnALU2 +# CHECK-NEXT: [5] - ZnALU3 +# CHECK-NEXT: [6] - ZnDivider +# CHECK-NEXT: [7] - ZnFPU0 +# CHECK-NEXT: [8] - ZnFPU1 +# CHECK-NEXT: [9] - ZnFPU2 +# CHECK-NEXT: [10] - ZnFPU3 +# CHECK-NEXT: [11] - ZnMultiplier + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] +# CHECK-NEXT: 1.00 1.00 0.66 1.00 0.67 0.67 - - - - - 1.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: +# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imull %edx, %ecx +# CHECK-NEXT: - 1.00 0.33 - 0.33 0.33 - - - - - - lzcntw (%rsp), %cx +# CHECK-NEXT: 1.00 - 0.33 - 0.33 0.33 - - - - - - lzcntw 2(%rsp), %cx + +# CHECK: Timeline view: +# CHECK-NEXT: 012345678 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeER . . . imull %edx, %ecx +# CHECK-NEXT: [0,1] DeeeeeeER . . . lzcntw (%rsp), %cx +# CHECK-NEXT: [0,2] .DeeeeeeER. . . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [1,0] .D======eeeER . . imull %edx, %ecx +# CHECK-NEXT: [1,1] . DeeeeeeE--R . . lzcntw (%rsp), %cx +# CHECK-NEXT: [1,2] . DeeeeeeE--R . . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [2,0] . D=======eeeER . imull %edx, %ecx +# CHECK-NEXT: [2,1] . DeeeeeeE----R . lzcntw (%rsp), %cx +# CHECK-NEXT: [2,2] . DeeeeeeE---R . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [3,0] . D=========eeeER imull %edx, %ecx +# CHECK-NEXT: [3,1] . DeeeeeeE-----R lzcntw (%rsp), %cx +# CHECK-NEXT: [3,2] . DeeeeeeE-----R lzcntw 2(%rsp), %cx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 4 6.5 0.3 0.0 imull %edx, %ecx +# CHECK-NEXT: 1. 4 1.0 1.0 2.8 lzcntw (%rsp), %cx +# CHECK-NEXT: 2. 4 1.0 1.0 2.5 lzcntw 2(%rsp), %cx Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s + +# An instruction that writes to a 32-bit register will not have any false +# dependence on the corresponding 64-bit register because the upper part of +# the 64-bit register is set to zero + +imulq %rax, %rcx +addl %edx, %ecx +addq %rcx, %rdx + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 9 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 4 1.00 imulq %rax, %rcx +# CHECK-NEXT: 1 1 0.25 addl %edx, %ecx +# CHECK-NEXT: 1 1 0.25 addq %rcx, %rdx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeeeER . imulq %rax, %rcx +# CHECK-NEXT: [0,1] D====eER. addl %edx, %ecx +# CHECK-NEXT: [0,2] D=====eER addq %rcx, %rdx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rcx +# CHECK-NEXT: 1. 1 5.0 0.0 0.0 addl %edx, %ecx +# CHECK-NEXT: 2. 1 6.0 0.0 0.0 addq %rcx, %rdx Index: llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s +++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s + +imul %ax, %cx +add %al, %cl +add %ecx, %ebx + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx +# CHECK-NEXT: 1 1 0.25 addb %al, %cl +# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeER . imulw %ax, %cx +# CHECK-NEXT: [0,1] D===eER. addb %al, %cl +# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx +# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx