Index: include/llvm/MCA/HardwareUnits/Scheduler.h =================================================================== --- include/llvm/MCA/HardwareUnits/Scheduler.h +++ include/llvm/MCA/HardwareUnits/Scheduler.h @@ -67,22 +67,6 @@ /// resources. This class is also responsible for tracking the progress of /// instructions from the dispatch stage, until the write-back stage. /// -/// An instruction dispatched to the Scheduler is initially placed into either -/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input -/// operands. -/// -/// An instruction is moved from the WaitSet to the ReadySet when register -/// operands become available, and all memory dependencies are met. -/// Instructions that are moved from the WaitSet to the ReadySet transition -/// in state from 'IS_DISPATCHED' to 'IS_READY'. -/// -/// On every cycle, the Scheduler checks if it can promote instructions from the -/// WaitSet to the ReadySet. -/// -/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued -/// to a (one or more) pipeline(s). This event also causes an instruction state -/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction -/// leaves the IssuedSet when it reaches the write-back stage. class Scheduler : public HardwareUnit { LSUnit &LSU; @@ -92,7 +76,38 @@ // Hardware resources that are managed by this scheduler. std::unique_ptr Resources; + // Instructions dispatched to the Scheduler are internally classified based on + // the instruction stage (see Instruction::InstrStage). + // + // An Instruction dispatched to the Scheduler is added to the WaitSet if not + // all its register operands are available, and at least one latency is unknown. + // By construction, the WaitSet only contains instructions that are in the + // IS_DISPATCHED stage. + // + // An Instruction transitions from the WaitSet to the PendingSet if the + // instruction is not ready yet, but the latency of every register read is known. + // Instructions in the PendingSet are expected to be in the IS_PENDING stage. + // + // Instructions in the PendingSet are immediately dominated only by + // instructions that have already been issued to the underlying pipelines. + // In the presence of bottlenecks caused by data dependencies, the PendingSet + // can be inspected to identify problematic data dependencies between + // instructions. + // + // An instruction is moved to the ReadySet when all register operands become + // available, and all memory dependencies are met. Instructions that are + // moved from the PendingSet to the ReadySet transition in state from + // 'IS_PENDING' to 'IS_READY'. + // + // On every cycle, the Scheduler checks if it can promote instructions from the + // PendingSet to the ReadySet. + // + // An Instruction is moved from the ReadySet to the `IssuedSet` when it starts + // exection. This event also causes an instruction state transition (i.e. from + // state IS_READY, to state IS_EXECUTING). An Instruction leaves the IssuedSet + // only when it reaches the write-back stage. std::vector WaitSet; + std::vector PendingSet; std::vector ReadySet; std::vector IssuedSet; @@ -118,9 +133,14 @@ // vector 'Executed'. void updateIssuedSet(SmallVectorImpl &Executed); - // Try to promote instructions from WaitSet to ReadySet. + // Try to promote instructions from the PendingSet to the ReadySet. // Add promoted instructions to the 'Ready' vector in input. - void promoteToReadySet(SmallVectorImpl &Ready); + // Returns true if at least one instruction was promoted. + bool promoteToReadySet(SmallVectorImpl &Ready); + + // Try to promote instructions from the WaitSet to the PendingSet. + // Returns true if at least one instruction was promoted. + bool promoteToPendingSet(); public: Scheduler(const MCSchedModel &Model, LSUnit &Lsu) Index: include/llvm/MCA/Instruction.h =================================================================== --- include/llvm/MCA/Instruction.h +++ include/llvm/MCA/Instruction.h @@ -168,6 +168,14 @@ bool clearsSuperRegisters() const { return ClearsSuperRegs; } bool isWriteZero() const { return WritesZero; } bool isEliminated() const { return IsEliminated; } + + bool isReady() const { + if (getDependentWrite()) + return false; + unsigned CyclesLeft = getDependentWriteCyclesLeft(); + return !CyclesLeft || CyclesLeft < getLatency(); + } + bool isExecuted() const { return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0; } @@ -239,6 +247,7 @@ unsigned getRegisterID() const { return RegisterID; } unsigned getRegisterFileID() const { return PRFID; } + bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; } bool isReady() const { return IsReady; } bool isImplicitRead() const { return RD->isImplicitRead(); } @@ -411,6 +420,7 @@ enum InstrStage { IS_INVALID, // Instruction in an invalid state. IS_DISPATCHED, // Instruction dispatched but operands are not ready. + IS_PENDING, // Instruction is not ready, but operand latency is known. IS_READY, // Instruction dispatched and operands ready. IS_EXECUTING, // Instruction issued. IS_EXECUTED, // Instruction executed. Values are written back. @@ -444,15 +454,18 @@ // all the definitions. void execute(); - // Force a transition from the IS_DISPATCHED state to the IS_READY state if - // input operands are all ready. State transitions normally occur at the - // beginning of a new cycle (see method cycleEvent()). However, the scheduler - // may decide to promote instructions from the wait queue to the ready queue - // as the result of another issue event. This method is called every time the - // instruction might have changed in state. + // Force a transition from the IS_DISPATCHED state to the IS_READY or + // IS_WAITING state. State transitions normally occur either at the beginning + // of a new cycle (see method cycleEvent()), or as a result of another issue + // event. This method is called every time the instruction might have changed + // in state. It internally delegates to method updateDispatched() and + // updateWaiting(). void update(); + bool updateDispatched(); + bool updatePending(); bool isDispatched() const { return Stage == IS_DISPATCHED; } + bool isPending() const { return Stage == IS_PENDING; } bool isReady() const { return Stage == IS_READY; } bool isExecuting() const { return Stage == IS_EXECUTING; } bool isExecuted() const { return Stage == IS_EXECUTED; } Index: lib/MCA/HardwareUnits/Scheduler.cpp =================================================================== --- lib/MCA/HardwareUnits/Scheduler.cpp +++ lib/MCA/HardwareUnits/Scheduler.cpp @@ -96,15 +96,15 @@ // other dependent instructions. Dependent instructions may be issued during // this same cycle if operands have ReadAdvance entries. Promote those // instructions to the ReadySet and notify the caller that those are ready. - if (HasDependentUsers) + if (HasDependentUsers && promoteToPendingSet()) promoteToReadySet(ReadyInstructions); } -void Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { +bool Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { // Scan the set of waiting instructions and promote them to the - // ready queue if operands are all ready. - unsigned RemovedElements = 0; - for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { + // ready set if operands are all ready. + unsigned PromotedElements = 0; + for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) { InstRef &IR = *I; if (!IR) break; @@ -113,7 +113,7 @@ // a transition in state using method 'update()'. Instruction &IS = *IR.getInstruction(); if (!IS.isReady()) - IS.update(); + IS.updatePending(); // Check if there are still unsolved data dependencies. if (!isReady(IR)) { @@ -121,15 +121,49 @@ continue; } + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " promoted to the READY set.\n"); + Ready.emplace_back(IR); ReadySet.emplace_back(IR); IR.invalidate(); + ++PromotedElements; + std::iter_swap(I, E - PromotedElements); + } + + PendingSet.resize(PendingSet.size() - PromotedElements); + return PromotedElements; +} + +bool Scheduler::promoteToPendingSet() { + // Scan the set of waiting instructions and promote them to the + // pending set if operands are all ready. + unsigned RemovedElements = 0; + for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR) + break; + + // Check if this instruction is now ready. In case, force + // a transition in state using method 'update()'. + Instruction &IS = *IR.getInstruction(); + if (IS.isDispatched() && !IS.updateDispatched()) { + ++I; + continue; + } + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " promoted to the PENDING set.\n"); + + PendingSet.emplace_back(IR); + + IR.invalidate(); ++RemovedElements; std::iter_swap(I, E - RemovedElements); } WaitSet.resize(WaitSet.size() - RemovedElements); + return RemovedElements; } InstRef Scheduler::select() { @@ -193,9 +227,13 @@ updateIssuedSet(Executed); + for (InstRef &IR : PendingSet) + IR.getInstruction()->cycleEvent(); + for (InstRef &IR : WaitSet) IR.getInstruction()->cycleEvent(); + promoteToPendingSet(); promoteToReadySet(Ready); BusyResourceUnits = 0; @@ -220,6 +258,13 @@ if (IsMemOp) LSU.dispatch(IR); + if (IR.getInstruction()->isPending()) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR + << " to the PendingSet\n"); + PendingSet.push_back(IR); + return; + } + if (!isReady(IR)) { LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); WaitSet.push_back(IR); Index: lib/MCA/Instruction.cpp =================================================================== --- lib/MCA/Instruction.cpp +++ lib/MCA/Instruction.cpp @@ -151,30 +151,54 @@ Stage = IS_EXECUTED; } -void Instruction::update() { - assert(isDispatched() && "Unexpected instruction stage found!"); +bool Instruction::updatePending() { + assert(isPending() && "Unexpected instruction stage found!"); if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); })) - return; + return false; // A partial register write cannot complete before a dependent write. - auto IsDefReady = [&](const WriteState &Def) { - if (!Def.getDependentWrite()) { - unsigned CyclesLeft = Def.getDependentWriteCyclesLeft(); - return !CyclesLeft || CyclesLeft < getLatency(); - } + auto IsDefReady = [](const WriteState &Def) { return Def.isReady(); }; + + if (!all_of(getDefs(), IsDefReady)) + return false; + + Stage = IS_READY; + return true; +} + +bool Instruction::updateDispatched() { + assert(isDispatched() && "Unexpected instruction stage found!"); + + if (!all_of(getUses(), [](const ReadState &Use) { + return Use.isPending() || Use.isReady(); + })) return false; + + // A partial register write cannot complete before a dependent write. + auto IsDefPendingOrReady = [&](const WriteState &Def) { + return !Def.getDependentWrite(); }; - if (all_of(getDefs(), IsDefReady)) - Stage = IS_READY; + if (!all_of(getDefs(), IsDefPendingOrReady)) + return false; + + Stage = IS_PENDING; + return true; +} + +void Instruction::update() { + if (isDispatched()) + updateDispatched(); + if (isPending()) + updatePending(); } void Instruction::cycleEvent() { if (isReady()) return; - if (isDispatched()) { + if (isDispatched() || isPending()) { for (ReadState &Use : getUses()) Use.cycleEvent();