diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -291,9 +291,14 @@ return NextGroupID++; } - // Instruction executed event handlers. virtual void onInstructionExecuted(const InstRef &IR); + // Loads are tracked by the LDQ (load queue) from dispatch until completion. + // Stores are tracked by the STQ (store queue) from dispatch until commitment. + // By default we conservatively assume that the LDQ receives a load at + // dispatch. Loads leave the LDQ at retirement stage. + virtual void onInstructionRetired(const InstRef &IR); + virtual void onInstructionIssued(const InstRef &IR) { unsigned GroupID = IR.getInstruction()->getLSUTokenID(); Groups[GroupID]->onInstructionIssued(IR); @@ -438,9 +443,6 @@ /// 6. A store has to wait until an older store barrier is fully executed. unsigned dispatch(const InstRef &IR) override; - // FIXME: For simplicity, we optimistically assume a similar behavior for - // store instructions. In practice, store operations don't tend to leave the - // store queue until they reach the 'Retired' stage (See PR39830). void onInstructionExecuted(const InstRef &IR) override; }; diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h --- a/llvm/include/llvm/MCA/Stages/RetireStage.h +++ b/llvm/include/llvm/MCA/Stages/RetireStage.h @@ -16,6 +16,7 @@ #ifndef LLVM_MCA_RETIRE_STAGE_H #define LLVM_MCA_RETIRE_STAGE_H +#include "llvm/MCA/HardwareUnits/LSUnit.h" #include "llvm/MCA/HardwareUnits/RegisterFile.h" #include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/MCA/Stages/Stage.h" @@ -27,13 +28,14 @@ // Owner will go away when we move listeners/eventing to the stages. RetireControlUnit &RCU; RegisterFile &PRF; + LSUnitBase &LSU; RetireStage(const RetireStage &Other) = delete; RetireStage &operator=(const RetireStage &Other) = delete; public: - RetireStage(RetireControlUnit &R, RegisterFile &F) - : Stage(), RCU(R), PRF(F) {} + RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) + : Stage(), RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { return !RCU.isEmpty(); } Error cycleStart() override; diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp --- a/llvm/lib/MCA/Context.cpp +++ b/llvm/lib/MCA/Context.cpp @@ -44,7 +44,7 @@ *RCU, *PRF); auto Execute = std::make_unique(*HWS, Opts.EnableBottleneckAnalysis); - auto Retire = std::make_unique(*RCU, *PRF); + auto Retire = std::make_unique(*RCU, *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. addHardwareUnit(std::move(RCU)); diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp --- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -160,17 +160,19 @@ } void LSUnitBase::onInstructionExecuted(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Expected a memory operation!"); - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); auto It = Groups.find(GroupID); + assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); It->second->onInstructionExecuted(); - if (It->second->isExecuted()) { + if (It->second->isExecuted()) Groups.erase(It); - } +} + +void LSUnitBase::onInstructionRetired(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Expected a memory operation!"); if (IsALoad) { releaseLQSlot(); diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp --- a/llvm/lib/MCA/Stages/RetireStage.cpp +++ b/llvm/lib/MCA/Stages/RetireStage.cpp @@ -52,6 +52,10 @@ llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); const Instruction &Inst = *IR.getInstruction(); + // Release the load/store queue entries. + if (Inst.isMemOp()) + LSU.onInstructionRetired(IR); + for (const WriteState &WS : Inst.getDefs()) PRF.removeRegisterWrite(WS, FreedRegs); notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s @@ -507,12 +507,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 593 +# CHECK-NEXT: Total Cycles: 554 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.67 +# CHECK-NEXT: uOps Per Cycle: 0.72 +# CHECK-NEXT: IPC: 0.72 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -532,24 +532,24 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 187 (31.5%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 342 (57.7%) +# CHECK-NEXT: SQ - Store queue full: 437 (78.9%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 403 (68.0%) -# CHECK-NEXT: 1, 90 (15.2%) -# CHECK-NEXT: 2, 2 (0.3%) -# CHECK-NEXT: 3, 86 (14.5%) -# CHECK-NEXT: 4, 12 (2.0%) +# CHECK-NEXT: 0, 365 (65.9%) +# CHECK-NEXT: 1, 88 (15.9%) +# CHECK-NEXT: 2, 3 (0.5%) +# CHECK-NEXT: 3, 86 (15.5%) +# CHECK-NEXT: 4, 12 (2.2%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 292 (49.2%) -# CHECK-NEXT: 1, 202 (34.1%) -# CHECK-NEXT: 2, 99 (16.7%) +# CHECK-NEXT: 0, 253 (45.7%) +# CHECK-NEXT: 1, 202 (36.5%) +# CHECK-NEXT: 2, 99 (17.9%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -595,8 +595,8 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax) -# CHECK-NEXT: 0.36 2.64 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 -# CHECK-NEXT: 2.64 0.36 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 +# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 +# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx) # CHECK: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s @@ -80,7 +80,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -102,9 +102,9 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -193,7 +193,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -215,9 +215,9 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -306,7 +306,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -328,9 +328,9 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -419,7 +419,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -441,9 +441,9 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -532,7 +532,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 532 (87.9%) +# CHECK-NEXT: LQ - Load queue full: 533 (88.1%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -554,8 +554,8 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 34 38 40 -# CHECK-NEXT: PdFPU 34 38 64 +# CHECK-NEXT: PdEX 33 36 40 +# CHECK-NEXT: PdFPU 33 36 64 # CHECK-NEXT: PdLoad 37 40 40 # CHECK-NEXT: PdStore 0 0 24 @@ -646,7 +646,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 532 (87.9%) +# CHECK-NEXT: LQ - Load queue full: 533 (88.1%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -668,8 +668,8 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 34 38 40 -# CHECK-NEXT: PdFPU 34 38 64 +# CHECK-NEXT: PdEX 33 36 40 +# CHECK-NEXT: PdFPU 33 36 64 # CHECK-NEXT: PdLoad 37 40 40 # CHECK-NEXT: PdStore 0 0 24 @@ -760,7 +760,7 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 344 (56.9%) +# CHECK-NEXT: LQ - Load queue full: 345 (57.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -781,9 +781,9 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 33 38 40 -# CHECK-NEXT: PdFPU 33 38 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdEX 33 36 40 +# CHECK-NEXT: PdFPU 33 36 64 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -81,14 +81,13 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -103,10 +102,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -195,14 +194,13 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -217,10 +215,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -309,14 +307,13 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -331,10 +328,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -423,14 +420,13 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -445,10 +441,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -537,7 +533,7 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 747 (93.0%) +# CHECK-NEXT: SQ - Store queue full: 748 (93.2%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: @@ -559,10 +555,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 -# CHECK-NEXT: PdFPU 22 23 64 +# CHECK-NEXT: PdEX 21 23 40 +# CHECK-NEXT: PdFPU 21 23 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 24 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -650,16 +646,17 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 185 (30.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 372 (61.8%) +# CHECK-NEXT: SQ - Store queue full: 559 (92.9%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 223 (37.0%) -# CHECK-NEXT: 1, 372 (61.8%) -# CHECK-NEXT: 4, 7 (1.2%) +# CHECK-NEXT: 0, 222 (36.9%) +# CHECK-NEXT: 1, 373 (62.0%) +# CHECK-NEXT: 3, 1 (0.2%) +# CHECK-NEXT: 4, 6 (1.0%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] @@ -673,10 +670,10 @@ # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 24 40 -# CHECK-NEXT: PdFPU 22 24 64 +# CHECK-NEXT: PdEX 21 23 40 +# CHECK-NEXT: PdFPU 21 23 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 24 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -763,9 +760,9 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 5963 (83.2%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 5777 (80.6%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 374 (5.2%) +# CHECK-NEXT: SQ - Store queue full: 561 (7.8%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: