Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -195,7 +195,7 @@ defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>; defm : X86WriteRes<WriteCMPXCHG, [JALU01], 3, [3], 5>; defm : X86WriteRes<WriteCMPXCHGRMW, [JALU01, JSAGU, JLAGU], 11, [3, 1, 1], 6>; -defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>; +defm : X86WriteRes<WriteXCHG, [JALU01], 1, [2], 2>; defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>; defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>; @@ -395,6 +395,96 @@ NOT8m, NOT16m, NOT32m, NOT64m, NEG8m, NEG16m, NEG32m, NEG64m)>; +def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> { + let Latency = 2; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} +def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr, + XADD32rr, XADD64rr)>; + +// This write defines the latency of the in/out register operand of a non-atomic +// XADDrm. This is the first of a pair of writes that model non-atomic +// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part). +// +// We need two writes because the instruction latency differs from the output +// register operand latency. In particular, the first write describes the first +// (and only) output register operand of the instruction. However, the +// instruction latency is set to the MAX of all the write latencies. That's why +// a second write is needed in this case (see example below). +// +// Example: +// XADD %ecx, (%rsp) ## Instruction latency: 11cy +// ## ECX write Latency: 3cy +// +// Register ECX becomes available in 3 cycles. That is because the value of ECX +// is exchanged with the value read from the stack pointer, and the load-to-use +// latency is assumed to be 3cy. +def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 3; // load-to-use latency + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XADDrm. This is the first of a sequence of two writes used to model atomic +// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part. +// +// +// Example: +// LOCK XADD %ecx, (%rsp) ## Instruction Latency: 16cy +// ## ECX write Latency: 11cy +// +// The value of ECX becomes available only after 11cy from the start of +// execution. This write is used to specifically set that operand latency. +def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XCHGrm. This write is the first of a sequence of two writes that describe +// atomic XCHG operations. We need two writes because the instruction latency +// differs from the output register write latency. We want to make sure that +// the output register operand becomes visible after 11cy. However, we want to +// set the instruction latency to 16cy. +def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; +} + +def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [16, 16]; + let NumMicroOps = 1; +} + +def JWriteXADDrm_Part1 : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteLOCK_XADDrm_XCHG_Part]>, + SchedVar<NoSchedPred, [JWriteXADDrm_XCHG_Part]> +]>; + +def JWriteXADDrm_Part2 : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteXCHGrm_LdSt_Part]>, + SchedVar<NoSchedPred, [JWriteXADDrm_LdSt_Part]> +]>; + +def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd], + (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm, + LXADD8, LXADD16, LXADD32, LXADD64)>; + +def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd], + (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>; + + //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// Index: test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s +++ test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s @@ -1871,33 +1871,33 @@ # CHECK-NEXT: 1 4 1.00 * testq %rsi, (%rax) # CHECK-NEXT: 1 100 0.50 * U ud2 # CHECK-NEXT: 1 100 0.50 U wrmsr -# CHECK-NEXT: 1 1 0.50 xaddb %bl, %cl -# CHECK-NEXT: 1 4 1.00 * * xaddb %bl, (%rcx) -# CHECK-NEXT: 1 4 1.00 * * lock xaddb %bl, (%rcx) -# CHECK-NEXT: 1 1 0.50 xaddw %bx, %cx -# CHECK-NEXT: 1 4 1.00 * * xaddw %ax, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xaddw %ax, (%rbx) -# CHECK-NEXT: 1 1 0.50 xaddl %ebx, %ecx -# CHECK-NEXT: 1 4 1.00 * * xaddl %eax, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xaddl %eax, (%rbx) -# CHECK-NEXT: 1 1 0.50 xaddq %rbx, %rcx -# CHECK-NEXT: 1 4 1.00 * * xaddq %rax, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xaddq %rax, (%rbx) -# CHECK-NEXT: 1 1 0.50 xchgb %bl, %cl -# CHECK-NEXT: 1 4 1.00 * * xchgb %bl, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xchgb %bl, (%rbx) -# CHECK-NEXT: 1 1 0.50 xchgw %bx, %ax -# CHECK-NEXT: 1 1 0.50 xchgw %bx, %cx -# CHECK-NEXT: 1 4 1.00 * * xchgw %ax, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xchgw %ax, (%rbx) -# CHECK-NEXT: 1 1 0.50 xchgl %ebx, %eax -# CHECK-NEXT: 1 1 0.50 xchgl %ebx, %ecx -# CHECK-NEXT: 1 4 1.00 * * xchgl %eax, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xchgl %eax, (%rbx) -# CHECK-NEXT: 1 1 0.50 xchgq %rbx, %rax -# CHECK-NEXT: 1 1 0.50 xchgq %rbx, %rcx -# CHECK-NEXT: 1 4 1.00 * * xchgq %rax, (%rbx) -# CHECK-NEXT: 1 4 1.00 * * lock xchgq %rax, (%rbx) +# CHECK-NEXT: 3 2 1.50 xaddb %bl, %cl +# CHECK-NEXT: 4 11 1.50 * * xaddb %bl, (%rcx) +# CHECK-NEXT: 4 16 16.00 * * lock xaddb %bl, (%rcx) +# CHECK-NEXT: 3 2 1.50 xaddw %bx, %cx +# CHECK-NEXT: 4 11 1.50 * * xaddw %ax, (%rbx) +# CHECK-NEXT: 4 16 16.00 * * lock xaddw %ax, (%rbx) +# CHECK-NEXT: 3 2 1.50 xaddl %ebx, %ecx +# CHECK-NEXT: 4 11 1.50 * * xaddl %eax, (%rbx) +# CHECK-NEXT: 4 16 16.00 * * lock xaddl %eax, (%rbx) +# CHECK-NEXT: 3 2 1.50 xaddq %rbx, %rcx +# CHECK-NEXT: 4 11 1.50 * * xaddq %rax, (%rbx) +# CHECK-NEXT: 4 16 16.00 * * lock xaddq %rax, (%rbx) +# CHECK-NEXT: 3 2 1.50 xchgb %bl, %cl +# CHECK-NEXT: 3 16 16.00 * * xchgb %bl, (%rbx) +# CHECK-NEXT: 3 16 16.00 * * lock xchgb %bl, (%rbx) +# CHECK-NEXT: 2 1 1.00 xchgw %bx, %ax +# CHECK-NEXT: 2 1 1.00 xchgw %bx, %cx +# CHECK-NEXT: 3 16 16.00 * * xchgw %ax, (%rbx) +# CHECK-NEXT: 3 16 16.00 * * lock xchgw %ax, (%rbx) +# CHECK-NEXT: 2 1 1.00 xchgl %ebx, %eax +# CHECK-NEXT: 2 1 1.00 xchgl %ebx, %ecx +# CHECK-NEXT: 3 16 16.00 * * xchgl %eax, (%rbx) +# CHECK-NEXT: 3 16 16.00 * * lock xchgl %eax, (%rbx) +# CHECK-NEXT: 2 1 1.00 xchgq %rbx, %rax +# CHECK-NEXT: 2 1 1.00 xchgq %rbx, %rcx +# CHECK-NEXT: 3 16 16.00 * * xchgq %rax, (%rbx) +# CHECK-NEXT: 3 16 16.00 * * lock xchgq %rax, (%rbx) # CHECK-NEXT: 1 3 1.00 * xlatb # CHECK-NEXT: 1 1 0.50 xorb $7, %al # CHECK-NEXT: 1 1 0.50 xorb $7, %dil @@ -1959,7 +1959,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 702.50 752.50 380.00 - - - - 812.00 64.00 713.00 - - - - +# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 64.00 893.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -2804,33 +2804,33 @@ # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - testq %rsi, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - ud2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - wrmsr -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xaddb %bl, %cl -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xaddb %bl, (%rcx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xaddb %bl, (%rcx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xaddw %bx, %cx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xaddw %ax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xaddw %ax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xaddl %ebx, %ecx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xaddl %eax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xaddl %eax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xaddq %rbx, %rcx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xaddq %rax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xaddq %rax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgb %bl, %cl -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xchgb %bl, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xchgb %bl, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgw %bx, %ax -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgw %bx, %cx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xchgw %ax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xchgw %ax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgl %ebx, %eax -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgl %ebx, %ecx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xchgl %eax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xchgl %eax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgq %rbx, %rax -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xchgq %rbx, %rcx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - xchgq %rax, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock xchgq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - xaddb %bl, %cl +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - xaddb %bl, (%rcx) +# CHECK-NEXT: 1.50 1.50 - - - - - 16.00 - 16.00 - - - - lock xaddb %bl, (%rcx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - xaddw %bx, %cx +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - xaddw %ax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 16.00 - 16.00 - - - - lock xaddw %ax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - xaddl %ebx, %ecx +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - xaddl %eax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 16.00 - 16.00 - - - - lock xaddl %eax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - xaddq %rbx, %rcx +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - xaddq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 16.00 - 16.00 - - - - lock xaddq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - xchgb %bl, %cl +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - xchgb %bl, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - lock xchgb %bl, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - xchgw %bx, %ax +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - xchgw %bx, %cx +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - xchgw %ax, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - lock xchgw %ax, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - xchgl %ebx, %eax +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - xchgl %ebx, %ecx +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - xchgl %eax, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - lock xchgl %eax, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - xchgq %rbx, %rax +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - xchgq %rbx, %rcx +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - xchgq %rax, (%rbx) +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - lock xchgq %rax, (%rbx) # CHECK-NEXT: - - - - - - - 1.00 - - - - - - xlatb # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xorb $7, %al # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - xorb $7, %dil Index: test/tools/llvm-mca/X86/BtVer2/xadd.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/xadd.s +++ test/tools/llvm-mca/X86/BtVer2/xadd.s @@ -0,0 +1,184 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=2 -timeline < %s | FileCheck %s + +# LLVM-MCA-BEGIN +xadd %ecx, (%rsp) +add %ecx, %ecx +add %ecx, %ecx +imul %ecx, %ecx +imul %ecx, %ecx +# LLVM-MCA-END + +# LLVM-MCA-BEGIN +lock xadd %ecx, (%rsp) +add %ecx, %ecx +add %ecx, %ecx +imul %ecx, %ecx +imul %ecx, %ecx +# LLVM-MCA-END + +# CHECK: [0] Code Region + +# CHECK: Iterations: 2 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 27 +# CHECK-NEXT: Total uOps: 20 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.74 +# CHECK-NEXT: IPC: 0.37 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 4 11 1.50 * * xaddl %ecx, (%rsp) +# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx +# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx +# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.50 4.50 - - - - - 1.00 2.00 1.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - xaddl %ecx, (%rsp) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - addl %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - addl %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %ecx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DeeeeeeeeeeeER . . .. xaddl %ecx, (%rsp) +# CHECK-NEXT: [0,1] . D=eE-------R . . .. addl %ecx, %ecx +# CHECK-NEXT: [0,2] . D==eE-------R. . .. addl %ecx, %ecx +# CHECK-NEXT: [0,3] . D==eeeE----R. . .. imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D====eeeE--R . .. imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D======eeeeeeeeeeeER.. xaddl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . . D=======eE-------R.. addl %ecx, %ecx +# CHECK-NEXT: [1,2] . . D========eE-------R. addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D========eeeE----R. imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D==========eeeE--R imull %ecx, %ecx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 4.0 0.5 0.0 xaddl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 5.0 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 6.0 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 6.0 0.0 4.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 8.0 0.0 2.0 imull %ecx, %ecx + +# CHECK: [1] Code Region + +# CHECK: Iterations: 2 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 38 +# CHECK-NEXT: Total uOps: 20 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.53 +# CHECK-NEXT: IPC: 0.26 +# CHECK-NEXT: Block RThroughput: 16.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 4 16 16.00 * * lock xaddl %ecx, (%rsp) +# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx +# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx +# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.50 4.50 - - - - - 16.00 2.00 16.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: 1.50 1.50 - - - - - 16.00 - 16.00 - - - - lock xaddl %ecx, (%rsp) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - addl %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - addl %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %ecx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 01234567 +# CHECK-NEXT: Index 0123456789 0123456789 + +# CHECK: [0,0] DeeeeeeeeeeeeeeeeER . . . . . lock xaddl %ecx, (%rsp) +# CHECK-NEXT: [0,1] . D=========eE----R . . . . . addl %ecx, %ecx +# CHECK-NEXT: [0,2] . D==========eE----R. . . . . addl %ecx, %ecx +# CHECK-NEXT: [0,3] . D==========eeeE-R. . . . . imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D============eeeER . . . . imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D===========eeeeeeeeeeeeeeeeER. . lock xaddl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . . D====================eE----R. . addl %ecx, %ecx +# CHECK-NEXT: [1,2] . . D=====================eE----R . addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D=====================eeeE-R . imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D=======================eeeER imull %ecx, %ecx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 6.5 0.5 0.0 lock xaddl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 15.5 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 16.5 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 16.5 0.0 1.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 18.5 0.0 0.0 imull %ecx, %ecx Index: test/tools/llvm-mca/X86/BtVer2/xchg.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/xchg.s +++ test/tools/llvm-mca/X86/BtVer2/xchg.s @@ -0,0 +1,89 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=2 -timeline < %s | FileCheck %s + +xchg %ecx, (%rsp) +add %ecx, %ecx +add %ecx, %ecx +imul %ecx, %ecx +imul %ecx, %ecx + +# CHECK: Iterations: 2 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 38 +# CHECK-NEXT: Total uOps: 18 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.47 +# CHECK-NEXT: IPC: 0.26 +# CHECK-NEXT: Block RThroughput: 16.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 3 16 16.00 * * xchgl %ecx, (%rsp) +# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx +# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx +# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.00 4.00 - - - - - 16.00 2.00 16.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: 1.00 1.00 - - - - - 16.00 - 16.00 - - - - xchgl %ecx, (%rsp) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - addl %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - addl %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %ecx +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %ecx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 01234567 +# CHECK-NEXT: Index 0123456789 0123456789 + +# CHECK: [0,0] DeeeeeeeeeeeeeeeeER . . . . . xchgl %ecx, (%rsp) +# CHECK-NEXT: [0,1] .D==========eE----R . . . . . addl %ecx, %ecx +# CHECK-NEXT: [0,2] . D==========eE----R. . . . . addl %ecx, %ecx +# CHECK-NEXT: [0,3] . D==========eeeE-R. . . . . imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D============eeeER . . . . imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D===========eeeeeeeeeeeeeeeeER. . xchgl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . .D=====================eE----R. . addl %ecx, %ecx +# CHECK-NEXT: [1,2] . . D=====================eE----R . addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D=====================eeeE-R . imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D=======================eeeER imull %ecx, %ecx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 6.5 0.5 0.0 xchgl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 16.5 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 16.5 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 16.5 0.0 1.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 18.5 0.0 0.0 imull %ecx, %ecx