Index: llvm/include/llvm/MCA/HardwareUnits/LSUnit.h =================================================================== --- llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -40,7 +40,10 @@ unsigned NumInstructions; unsigned NumExecuting; unsigned NumExecuted; - SmallVector Succ; + // Successors that are in a order dependency with this group. + SmallVector OrderSucc; + // Successors that are in a data dependency with this group. + SmallVector DataSucc; CriticalDependency CriticalPredecessor; InstRef CriticalMemoryInstruction; @@ -55,8 +58,9 @@ NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {} MemoryGroup(MemoryGroup &&) = default; - ArrayRef getSuccessors() const { return Succ; } - unsigned getNumSuccessors() const { return Succ.size(); } + unsigned getNumSuccessors() const { + return OrderSucc.size() + DataSucc.size(); + } unsigned getNumPredecessors() const { return NumPredecessors; } unsigned getNumExecutingPredecessors() const { return NumExecutingPredecessors; @@ -75,12 +79,22 @@ return CriticalPredecessor; } - void addSuccessor(MemoryGroup *Group) { + void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { + // Do not need to add a dependency if there is no data + // dependency and all instructions from this group have been + // issued already. + if (!IsDataDependent && isExecuting()) + return; + Group->NumPredecessors++; assert(!isExecuted() && "Should have been removed!"); if (isExecuting()) - Group->onGroupIssued(CriticalMemoryInstruction); - Succ.emplace_back(Group); + Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent); + + if (IsDataDependent) + DataSucc.emplace_back(Group); + else + OrderSucc.emplace_back(Group); } bool isWaiting() const { @@ -98,10 +112,13 @@ } bool isExecuted() const { return NumInstructions == NumExecuted; } - void onGroupIssued(const InstRef &IR) { + void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { assert(!isReady() && "Unexpected group-start event!"); NumExecutingPredecessors++; + if (!ShouldUpdateCriticalDep) + return; + unsigned Cycles = IR.getInstruction()->getCyclesLeft(); if (CriticalPredecessor.Cycles < Cycles) { CriticalPredecessor.IID = IR.getSourceIndex(); @@ -133,8 +150,14 @@ return; // Notify successors that this group started execution. - for (MemoryGroup *MG : Succ) - MG->onGroupIssued(CriticalMemoryInstruction); + for (MemoryGroup *MG : OrderSucc) { + MG->onGroupIssued(CriticalMemoryInstruction, false); + // Release the order dependency with this group. + MG->onGroupExecuted(); + } + + for (MemoryGroup *MG : DataSucc) + MG->onGroupIssued(CriticalMemoryInstruction, true); } void onInstructionExecuted() { @@ -145,8 +168,8 @@ if (!isExecuted()) return; - // Notify successors that this group has finished execution. - for (MemoryGroup *MG : Succ) + // Notify data dependent successors that this group has finished execution. + for (MemoryGroup *MG : DataSucc) MG->onGroupExecuted(); } @@ -412,6 +435,7 @@ unsigned CurrentLoadGroupID; unsigned CurrentLoadBarrierGroupID; unsigned CurrentStoreGroupID; + unsigned CurrentStoreBarrierGroupID; public: LSUnit(const MCSchedModel &SM) @@ -420,7 +444,8 @@ : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {} LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias) : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0), - CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {} + CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0), + CurrentStoreBarrierGroupID(0) {} /// Returns LSU_AVAILABLE if there are enough load/store queue entries to /// accomodate instruction IR. Index: llvm/lib/MCA/HardwareUnits/LSUnit.cpp =================================================================== --- llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -77,9 +77,6 @@ acquireSQSlot(); if (Desc.MayStore) { - // Always create a new group for store operations. - - // A store may not pass a previous store or store barrier. unsigned NewGID = createMemoryGroup(); MemoryGroup &NewGroup = getGroup(NewGID); NewGroup.addInstruction(); @@ -91,16 +88,32 @@ MemoryGroup &IDom = getGroup(ImmediateLoadDominator); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator << ") --> (" << NewGID << ")\n"); - IDom.addSuccessor(&NewGroup); + IDom.addSuccessor(&NewGroup, !assumeNoAlias()); + } + + // A store may not pass a previous store barrier. + if (CurrentStoreBarrierGroupID) { + MemoryGroup &StoreGroup = getGroup(CurrentStoreBarrierGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" + << CurrentStoreBarrierGroupID + << ") --> (" << NewGID << ")\n"); + StoreGroup.addSuccessor(&NewGroup, true); } - if (CurrentStoreGroupID) { + + // A store may not pass a previous store. + if (CurrentStoreGroupID && + (CurrentStoreGroupID != CurrentStoreBarrierGroupID)) { MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID << ") --> (" << NewGID << ")\n"); - StoreGroup.addSuccessor(&NewGroup); + StoreGroup.addSuccessor(&NewGroup, !assumeNoAlias()); } + CurrentStoreGroupID = NewGID; + if (IsMemBarrier) + CurrentStoreBarrierGroupID = NewGID; + if (Desc.MayLoad) { CurrentLoadGroupID = NewGID; if (IsMemBarrier) @@ -112,31 +125,43 @@ assert(Desc.MayLoad && "Expected a load!"); - // Always create a new memory group if this is the first load of the sequence. + unsigned ImmediateLoadDominator = + std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID); + + bool ShouldCreateANewGroup = IsMemBarrier || !ImmediateLoadDominator || + ImmediateLoadDominator <= CurrentStoreGroupID || + getGroup(ImmediateLoadDominator).isExecuting(); - // A load may not pass a previous store unless flag 'NoAlias' is set. - // A load may pass a previous load. - // A younger load cannot pass a older load barrier. - // A load barrier cannot pass a older load. - bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier || - CurrentLoadGroupID <= CurrentStoreGroupID || - CurrentLoadGroupID <= CurrentLoadBarrierGroupID; if (ShouldCreateANewGroup) { unsigned NewGID = createMemoryGroup(); MemoryGroup &NewGroup = getGroup(NewGID); NewGroup.addInstruction(); + // A load may not pass a previous store or store barrier + // unless flag 'NoAlias' is set. if (!assumeNoAlias() && CurrentStoreGroupID) { MemoryGroup &StGroup = getGroup(CurrentStoreGroupID); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID << ") --> (" << NewGID << ")\n"); - StGroup.addSuccessor(&NewGroup); + StGroup.addSuccessor(&NewGroup, true); } - if (CurrentLoadBarrierGroupID) { - MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID); - LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID - << ") --> (" << NewGID << ")\n"); - LdGroup.addSuccessor(&NewGroup); + + // A load barrier may not pass a previous load or load barrier. + if (IsMemBarrier) { + if (ImmediateLoadDominator) { + MemoryGroup &LdGroup = getGroup(ImmediateLoadDominator); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator + << ") --> (" << NewGID << ")\n"); + LdGroup.addSuccessor(&NewGroup, true); + } + } else { + // A younger load cannot pass a older load barrier. + if (CurrentLoadBarrierGroupID) { + MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID + << ") --> (" << NewGID << ")\n"); + LdGroup.addSuccessor(&NewGroup, true); + } } CurrentLoadGroupID = NewGID; @@ -145,6 +170,7 @@ return NewGID; } + // A load may pass a previous load. MemoryGroup &Group = getGroup(CurrentLoadGroupID); Group.addInstruction(); return CurrentLoadGroupID; Index: llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st1 {v0.s}[0], [sp] st1 {v0.2s}, [sp] Index: llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st2 {v0.s, v1.s}[0], [sp] st2 {v0.2s, v1.2s}, [sp] Index: llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st3 {v0.s, v1.s, v2.s}[0], [sp] st3 {v0.2s, v1.2s, v2.2s}, [sp] Index: llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp] st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] Index: llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 stur d0, [sp, #2] stur q0, [sp, #16] Index: llvm/test/tools/llvm-mca/AArch64/Exynos/store.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/store.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/store.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 stur x0, [sp, #8] strb w0, [sp], #1 Index: llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s =================================================================== --- llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s +++ llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s @@ -47,12 +47,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -72,22 +72,21 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -116,16 +115,16 @@ # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movb %spl, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movb (%rcx), %bpl -# CHECK-NEXT: - - - - - - 0.95 0.05 movb (%rdx), %sil -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movb %dil, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movb (%rdx), %sil +# CHECK-NEXT: - - - - 1.00 - 1.00 - movb %dil, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movb %spl, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl -# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil -# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx) +# CHECK: [0,0] DeER . . movb %spl, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl +# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil +# CHECK-NEXT: [0,3] D=eE----R movb %dil, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -137,19 +136,19 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [1] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -169,22 +168,21 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -213,16 +211,16 @@ # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movw %sp, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movw (%rcx), %bp -# CHECK-NEXT: - - - - - - 0.95 0.05 movw (%rdx), %si -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movw %di, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movw (%rdx), %si +# CHECK-NEXT: - - - - 1.00 - 1.00 - movw %di, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movw %sp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp -# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si -# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx) +# CHECK: [0,0] DeER . . movw %sp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp +# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si +# CHECK-NEXT: [0,3] D=eE----R movw %di, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -234,19 +232,19 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movw %di, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [2] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -266,22 +264,21 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -310,16 +307,16 @@ # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movl %esp, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movl (%rcx), %ebp -# CHECK-NEXT: - - - - - - 0.95 0.05 movl (%rdx), %esi -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movl %edi, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movl (%rdx), %esi +# CHECK-NEXT: - - - - 1.00 - 1.00 - movl %edi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movl %esp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp -# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi -# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx) +# CHECK: [0,0] DeER . . movl %esp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp +# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi +# CHECK-NEXT: [0,3] D=eE----R movl %edi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -331,19 +328,19 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [3] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -363,22 +360,21 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -407,16 +403,16 @@ # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movq %rsp, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movq (%rcx), %rbp -# CHECK-NEXT: - - - - - - 0.95 0.05 movq (%rdx), %rsi -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movq %rdi, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movq (%rdx), %rsi +# CHECK-NEXT: - - - - 1.00 - 1.00 - movq %rdi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movq %rsp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp -# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi -# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx) +# CHECK: [0,0] DeER . . movq %rsp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp +# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi +# CHECK-NEXT: [0,3] D=eE----R movq %rdi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -428,19 +424,19 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [4] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -460,22 +456,21 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -504,16 +499,16 @@ # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movd %mm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movd (%rcx), %mm1 -# CHECK-NEXT: - - - - - - 0.95 0.05 movd (%rdx), %mm2 -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movd %mm3, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movd (%rdx), %mm2 +# CHECK-NEXT: - - - - 1.00 - 1.00 - movd %mm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movd %mm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1 -# CHECK-NEXT: [0,2] D=eeeeeER. movd (%rdx), %mm2 -# CHECK-NEXT: [0,3] D======eER movd %mm3, (%rbx) +# CHECK: [0,0] DeER . . movd %mm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1 +# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2 +# CHECK-NEXT: [0,3] D=eE----R movd %mm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -525,19 +520,19 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [5] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 209 +# CHECK-NEXT: Total Cycles: 208 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.91 -# CHECK-NEXT: IPC: 1.91 +# CHECK-NEXT: uOps Per Cycle: 1.92 +# CHECK-NEXT: IPC: 1.92 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -557,22 +552,21 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.3%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 35 (16.7%) -# CHECK-NEXT: 2, 148 (70.8%) -# CHECK-NEXT: 4, 26 (12.4%) +# CHECK-NEXT: 0, 34 (16.3%) +# CHECK-NEXT: 2, 148 (71.2%) +# CHECK-NEXT: 4, 26 (12.5%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 12 (5.7%) -# CHECK-NEXT: 2, 194 (92.8%) +# CHECK-NEXT: 0, 8 (3.8%) +# CHECK-NEXT: 2, 200 (96.2%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -601,17 +595,16 @@ # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movaps %xmm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movaps (%rcx), %xmm1 -# CHECK-NEXT: - - - - - - 0.94 0.06 movaps (%rdx), %xmm2 -# CHECK-NEXT: - - - - 1.00 - 0.06 0.94 movaps %xmm3, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movaps (%rdx), %xmm2 +# CHECK-NEXT: - - - - 1.00 - 1.00 - movaps %xmm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeeER . movaps (%rcx), %xmm1 -# CHECK-NEXT: [0,2] D=eeeeeeER. movaps (%rdx), %xmm2 -# CHECK-NEXT: [0,3] D=======eER movaps %xmm3, (%rbx) +# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeeER. movaps (%rcx), %xmm1 +# CHECK-NEXT: [0,2] D=eeeeeeER movaps (%rdx), %xmm2 +# CHECK-NEXT: [0,3] D=eE-----R movaps %xmm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -623,5 +616,5 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 -# CHECK-NEXT: 3. 1 8.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 3.0 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 5.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.3 Index: llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s =================================================================== --- llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s +++ llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s @@ -135,10 +135,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movb %bpl, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movb %sil, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [1] Code Region @@ -232,10 +232,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movw %bp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movw %si, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [2] Code Region @@ -329,10 +329,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movl %ebp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movl %esi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [3] Code Region @@ -426,10 +426,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movq %rbp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movq %rsi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [4] Code Region @@ -620,7 +620,7 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movaps %xmm2, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movaps %xmm1, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movaps %xmm2, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 Index: llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s =================================================================== --- llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s +++ llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s @@ -72,23 +72,24 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -99,8 +100,8 @@ # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -133,18 +134,18 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movb %spl, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movb %spl, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movb (%rcx), %bpl # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movb (%rdx), %sil -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movb %spl, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl -# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil -# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx) +# CHECK: [0,0] DeER . . movb %spl, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl +# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil +# CHECK-NEXT: [0,3] D==eE---R movb %dil, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -156,8 +157,8 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [1] Code Region @@ -188,23 +189,24 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -215,8 +217,8 @@ # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -249,18 +251,18 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movw %sp, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movw %sp, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movw (%rcx), %bp # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movw (%rdx), %si -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movw %sp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp -# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si -# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx) +# CHECK: [0,0] DeER . . movw %sp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp +# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si +# CHECK-NEXT: [0,3] D==eE---R movw %di, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -272,8 +274,8 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movw %di, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [2] Code Region @@ -304,23 +306,24 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -331,8 +334,8 @@ # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -365,18 +368,18 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movl %esp, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movl %esp, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movl (%rcx), %ebp # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movl (%rdx), %esi -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movl %esp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp -# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi -# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx) +# CHECK: [0,0] DeER . . movl %esp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp +# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi +# CHECK-NEXT: [0,3] D==eE---R movl %edi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -388,8 +391,8 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [3] Code Region @@ -420,23 +423,24 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -447,8 +451,8 @@ # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -481,18 +485,18 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rsp, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rsp, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movq (%rcx), %rbp # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movq (%rdx), %rsi -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movq %rsp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp -# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi -# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx) +# CHECK: [0,0] DeER . . movq %rsp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp +# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi +# CHECK-NEXT: [0,3] D==eE---R movq %rdi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -504,14 +508,14 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [4] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 554 +# CHECK-NEXT: Total Cycles: 553 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -536,24 +540,24 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 57 (10.3%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 437 (78.9%) +# CHECK-NEXT: SQ - Store queue full: 432 (78.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 365 (65.9%) +# CHECK-NEXT: 0, 364 (65.8%) # CHECK-NEXT: 1, 88 (15.9%) -# CHECK-NEXT: 2, 3 (0.5%) -# CHECK-NEXT: 3, 86 (15.5%) -# CHECK-NEXT: 4, 12 (2.2%) +# CHECK-NEXT: 2, 4 (0.7%) +# CHECK-NEXT: 3, 84 (15.2%) +# CHECK-NEXT: 4, 13 (2.4%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 253 (45.7%) -# CHECK-NEXT: 1, 202 (36.5%) -# CHECK-NEXT: 2, 99 (17.9%) +# CHECK-NEXT: 0, 253 (45.8%) +# CHECK-NEXT: 1, 200 (36.2%) +# CHECK-NEXT: 2, 100 (18.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -599,18 +603,17 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax) -# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 -# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 +# CHECK-NEXT: 1.50 1.50 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: 0 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . movd %mm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1 -# CHECK-NEXT: [0,2] D=eeeeeER . movd (%rdx), %mm2 -# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx) +# CHECK: [0,0] DeeER. . movd %mm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1 +# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2 +# CHECK-NEXT: [0,3] D===eeE-R movd %mm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -622,8 +625,8 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 4.0 1.0 1.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 2.0 1.3 0.3 # CHECK: [5] Code Region @@ -668,9 +671,9 @@ # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 104 (25.7%) -# CHECK-NEXT: 1, 202 (49.9%) -# CHECK-NEXT: 2, 99 (24.4%) +# CHECK-NEXT: 0, 105 (25.9%) +# CHECK-NEXT: 1, 200 (49.4%) +# CHECK-NEXT: 2, 100 (24.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -679,10 +682,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 37 40 40 -# CHECK-NEXT: PdFPU 37 40 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 22 24 +# CHECK-NEXT: PdEX 36 40 40 +# CHECK-NEXT: PdFPU 36 40 64 +# CHECK-NEXT: PdLoad 20 23 40 +# CHECK-NEXT: PdStore 19 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -721,12 +724,12 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movaps %xmm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movaps (%rcx), %xmm1 -# CHECK-NEXT: [0,2] D=eeeeeER. movaps (%rdx), %xmm2 -# CHECK-NEXT: [0,3] D======eER movaps %xmm3, (%rbx) +# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movaps (%rcx), %xmm1 +# CHECK-NEXT: [0,2] D=eeeeeER movaps (%rdx), %xmm2 +# CHECK-NEXT: [0,3] D===eE--R movaps %xmm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -738,5 +741,5 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 4.0 2.0 2.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 2.0 1.5 0.5 Index: llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s =================================================================== --- llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s +++ llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s @@ -101,9 +101,9 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0 # CHECK-NEXT: 1. 1 7.0 1.0 0.0 vmovaps %xmm0, (%rdi) # CHECK-NEXT: 2. 1 1.0 1.0 2.0 vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 3. 1 8.0 1.0 0.0 vmovaps %xmm0, 16(%rdi) # CHECK-NEXT: 4. 1 3.0 3.0 0.0 vmovaps 32(%rsi), %xmm0 # CHECK-NEXT: 5. 1 9.0 1.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 3.0 3.0 2.0 vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: 7. 1 10.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) -# CHECK-NEXT: 1 5.3 1.3 0.5 +# CHECK-NEXT: 7. 1 10.0 1.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 5.3 1.5 0.5 Index: llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s =================================================================== --- llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s +++ llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=104 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=204 < %s | FileCheck %s int3 stmxcsr (%rsp) @@ -27,11 +27,12 @@ # CHECK-NEXT: 2 1 18.00 * U stmxcsr (%rsp) # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123 +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123 -# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3 -# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp) +# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . int3 +# CHECK-NEXT: [0,1] D====================================================================================================eER . . . . . . . . . . . . . . . . . . . . . stmxcsr (%rsp) +# CHECK-NEXT: [1,0] D=====================================================================================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s =================================================================== --- llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -159,10 +159,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movb %bpl, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movb %sil, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [1] Code Region @@ -273,10 +273,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movw %bp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movw %si, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [2] Code Region @@ -387,10 +387,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movl %ebp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movl %esi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [3] Code Region @@ -501,10 +501,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movq %rbp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movq %rsi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [4] Code Region @@ -732,10 +732,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx) -# CHECK-NEXT: 2. 1 4.0 1.0 0.0 movaps %xmm2, (%rdx) -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 3.0 0.5 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movaps %xmm1, (%rcx) +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 movaps %xmm2, (%rdx) +# CHECK-NEXT: 3. 1 5.0 1.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 3.0 1.3 0.0 # CHECK: [6] Code Region @@ -846,7 +846,7 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps %ymm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 1.0 0.0 vmovaps %ymm1, (%rcx) -# CHECK-NEXT: 2. 1 35.0 33.0 0.0 vmovaps %ymm2, (%rdx) -# CHECK-NEXT: 3. 1 36.0 1.0 0.0 vmovaps %ymm3, (%rbx) -# CHECK-NEXT: 1 18.5 9.0 0.0 +# CHECK-NEXT: 1. 1 2.0 2.0 0.0 vmovaps %ymm1, (%rcx) +# CHECK-NEXT: 2. 1 35.0 34.0 0.0 vmovaps %ymm2, (%rdx) +# CHECK-NEXT: 3. 1 36.0 2.0 0.0 vmovaps %ymm3, (%rbx) +# CHECK-NEXT: 1 18.5 9.8 0.0 Index: llvm/test/tools/llvm-mca/X86/BtVer2/independent-load-stores.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/BtVer2/independent-load-stores.s @@ -0,0 +1,146 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1008 +# YESALIAS-NEXT: Total Cycles: 6003 + +# ALL-NEXT: Total uOps: 1000 + +# ALL: Dispatch Width: 2 + +# NOALIAS-NEXT: uOps Per Cycle: 0.99 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.17 +# YESALIAS-NEXT: IPC: 0.17 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 6 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - JALU0 +# ALL-NEXT: [1] - JALU1 +# ALL-NEXT: [2] - JDiv +# ALL-NEXT: [3] - JFPA +# ALL-NEXT: [4] - JFPM +# ALL-NEXT: [5] - JFPU0 +# ALL-NEXT: [6] - JFPU1 +# ALL-NEXT: [7] - JLAGU +# ALL-NEXT: [8] - JMul +# ALL-NEXT: [9] - JSAGU +# ALL-NEXT: [10] - JSTC +# ALL-NEXT: [11] - JVALU0 +# ALL-NEXT: [12] - JVALU1 +# ALL-NEXT: [13] - JVIMUL + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# ALL-NEXT: 5.00 5.00 - - - - - 10.00 - 10.00 - - - - + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 64(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 128(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 192(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 256(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 320(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 384(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 448(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 512(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 576(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 01234567 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 012 + +# NOALIAS: [0,0] DeeeeeeER . . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] D=eeeeeeER. . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] .D=eeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] .D==eeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . D==eeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . D===eeeeeeER . . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . D===eeeeeeER. . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . D====eeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . D====eeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . D=====eeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeER . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] D======eeeeeeER. . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] .D===========eeeeeeER . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] .D=================eeeeeeER . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D======================eeeeeeER . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D============================eeeeeeER . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . D=================================eeeeeeER. . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . D=======================================eeeeeeER . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . D============================================eeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . D==================================================eeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 3.5 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 12.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 18.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 23.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 29.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 34.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 40.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 45.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 51.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 26.0 0.1 0.0 Index: llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s =================================================================== --- llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s +++ llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=104 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=204 < %s | FileCheck %s int3 stmxcsr (%rsp) @@ -27,11 +27,12 @@ # CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rsp) # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123 +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123 -# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3 -# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp) +# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . int3 +# CHECK-NEXT: [0,1] D====================================================================================================eER . . . . . . . . . . . . . . . . . . . . . stmxcsr (%rsp) +# CHECK-NEXT: [1,0] .D====================================================================================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s =================================================================== --- llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s +++ llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s @@ -21,12 +21,12 @@ # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 27 +# CHECK-NEXT: Total Cycles: 24 # CHECK-NEXT: Total uOps: 16 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.59 -# CHECK-NEXT: IPC: 0.37 +# CHECK-NEXT: uOps Per Cycle: 0.67 +# CHECK-NEXT: IPC: 0.42 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -74,18 +74,18 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeeeeeeeeER . . .. xaddl %ecx, (%rsp) -# CHECK-NEXT: [0,1] . D=eE-------R . . .. addl %ecx, %ecx -# CHECK-NEXT: [0,2] . D==eE-------R. . .. addl %ecx, %ecx -# CHECK-NEXT: [0,3] . D==eeeE----R. . .. imull %ecx, %ecx -# CHECK-NEXT: [0,4] . D=====eeeE--R . .. imull %ecx, %ecx -# CHECK-NEXT: [1,0] . D=======eeeeeeeeeeeER.. xaddl %ecx, (%rsp) -# CHECK-NEXT: [1,1] . .D========eE-------R.. addl %ecx, %ecx -# CHECK-NEXT: [1,2] . .D=========eE-------R. addl %ecx, %ecx -# CHECK-NEXT: [1,3] . . D=========eeeE----R. imull %ecx, %ecx -# CHECK-NEXT: [1,4] . . D============eeeE--R imull %ecx, %ecx +# CHECK-NEXT: Index 0123456789 0123 + +# CHECK: [0,0] DeeeeeeeeeeeER . . . xaddl %ecx, (%rsp) +# CHECK-NEXT: [0,1] . D=eE-------R . . . addl %ecx, %ecx +# CHECK-NEXT: [0,2] . D==eE-------R. . . addl %ecx, %ecx +# CHECK-NEXT: [0,3] . D==eeeE----R. . . imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D=====eeeE--R . . imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D====eeeeeeeeeeeER . xaddl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . .D=====eE-------R . addl %ecx, %ecx +# CHECK-NEXT: [1,2] . .D======eE-------R. addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D======eeeE----R. imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D=========eeeE--R imull %ecx, %ecx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -94,12 +94,12 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 4.5 0.5 0.0 xaddl %ecx, (%rsp) -# CHECK-NEXT: 1. 2 5.5 0.0 7.0 addl %ecx, %ecx -# CHECK-NEXT: 2. 2 6.5 0.0 7.0 addl %ecx, %ecx -# CHECK-NEXT: 3. 2 6.5 0.0 4.0 imull %ecx, %ecx -# CHECK-NEXT: 4. 2 9.5 0.0 2.0 imull %ecx, %ecx -# CHECK-NEXT: 2 6.5 0.1 4.0 +# CHECK-NEXT: 0. 2 3.0 0.5 0.0 xaddl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 4.0 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 5.0 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 5.0 0.0 4.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 8.0 0.0 2.0 imull %ecx, %ecx +# CHECK-NEXT: 2 5.0 0.1 4.0 # CHECK: [1] Code Region Index: llvm/test/tools/llvm-mca/X86/Haswell/independent-load-stores.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/Haswell/independent-load-stores.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mcpu=haswell -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mcpu=haswell -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1009 +# YESALIAS-NEXT: Total Cycles: 7003 + +# ALL-NEXT: Total uOps: 3000 + +# ALL: Dispatch Width: 4 + +# NOALIAS-NEXT: uOps Per Cycle: 2.97 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.43 +# YESALIAS-NEXT: IPC: 0.14 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - HWDivider +# ALL-NEXT: [1] - HWFPDivider +# ALL-NEXT: [2] - HWPort0 +# ALL-NEXT: [3] - HWPort1 +# ALL-NEXT: [4] - HWPort2 +# ALL-NEXT: [5] - HWPort3 +# ALL-NEXT: [6] - HWPort4 +# ALL-NEXT: [7] - HWPort5 +# ALL-NEXT: [8] - HWPort6 +# ALL-NEXT: [9] - HWPort7 + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67 + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14) +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 012345678 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] .DeeeeeeeER . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] . DeeeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] . DeeeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . DeeeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . DeeeeeeeER. . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . .DeeeeeeeER . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . . DeeeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . . DeeeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . . DeeeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] .D======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] . D============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] . D==================eeeeeeeER . . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D========================eeeeeeeER . . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D==============================eeeeeeeER. . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . .D====================================eeeeeeeER . . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . . D==========================================eeeeeeeER . . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . . D================================================eeeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . . D======================================================eeeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 1.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 1.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 1.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 1.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 1.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 1.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 1.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 1.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 1.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 1.0 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 13.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 19.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 25.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 31.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 37.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 43.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 49.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 55.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 28.0 0.1 0.0 Index: llvm/test/tools/llvm-mca/X86/SkylakeClient/independent-load-stores.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/SkylakeClient/independent-load-stores.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1009 +# YESALIAS-NEXT: Total Cycles: 7003 + +# ALL-NEXT: Total uOps: 3000 + +# ALL: Dispatch Width: 6 + +# NOALIAS-NEXT: uOps Per Cycle: 2.97 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.43 +# YESALIAS-NEXT: IPC: 0.14 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - SKLDivider +# ALL-NEXT: [1] - SKLFPDivider +# ALL-NEXT: [2] - SKLPort0 +# ALL-NEXT: [3] - SKLPort1 +# ALL-NEXT: [4] - SKLPort2 +# ALL-NEXT: [5] - SKLPort3 +# ALL-NEXT: [6] - SKLPort4 +# ALL-NEXT: [7] - SKLPort5 +# ALL-NEXT: [8] - SKLPort6 +# ALL-NEXT: [9] - SKLPort7 + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67 + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14) +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 012345678 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] D=eeeeeeeER . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] .D=eeeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] .D==eeeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . D==eeeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . D===eeeeeeeER. . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . D===eeeeeeeER . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . D====eeeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . D====eeeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . D=====eeeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] D=======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] .D=============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] .D====================eeeeeeeER . . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D==========================eeeeeeeER . . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D=================================eeeeeeeER. . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . D=======================================eeeeeeeER . . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . D==============================================eeeeeeeER . . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . D====================================================eeeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . D===========================================================eeeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 3.5 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 8.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 14.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 21.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 27.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 34.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 40.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 47.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 53.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 60.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 30.5 0.1 0.0 Index: llvm/test/tools/llvm-mca/X86/SkylakeServer/independent-load-stores.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/SkylakeServer/independent-load-stores.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1009 +# YESALIAS-NEXT: Total Cycles: 7003 + +# ALL-NEXT: Total uOps: 3000 + +# ALL: Dispatch Width: 6 + +# NOALIAS-NEXT: uOps Per Cycle: 2.97 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.43 +# YESALIAS-NEXT: IPC: 0.14 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - SKXDivider +# ALL-NEXT: [1] - SKXFPDivider +# ALL-NEXT: [2] - SKXPort0 +# ALL-NEXT: [3] - SKXPort1 +# ALL-NEXT: [4] - SKXPort2 +# ALL-NEXT: [5] - SKXPort3 +# ALL-NEXT: [6] - SKXPort4 +# ALL-NEXT: [7] - SKXPort5 +# ALL-NEXT: [8] - SKXPort6 +# ALL-NEXT: [9] - SKXPort7 + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67 + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14) +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 012345678 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] D=eeeeeeeER . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] .D=eeeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] .D==eeeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . D==eeeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . D===eeeeeeeER. . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . D===eeeeeeeER . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . D====eeeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . D====eeeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . D=====eeeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] D=======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] .D=============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] .D====================eeeeeeeER . . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D==========================eeeeeeeER . . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D=================================eeeeeeeER. . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . D=======================================eeeeeeeER . . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . D==============================================eeeeeeeER . . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . D====================================================eeeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . D===========================================================eeeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 3.5 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 8.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 14.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 21.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 27.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 34.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 40.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 47.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 53.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 60.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 30.5 0.1 0.0