Index: lib/Target/AMDGPU/SIMachineScheduler.h =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.h +++ lib/Target/AMDGPU/SIMachineScheduler.h @@ -54,6 +54,11 @@ class SIScheduleDAGMI; class SIScheduleBlockCreator; +enum SIScheduleBlockLinkKind { + NoData, + Data +}; + class SIScheduleBlock { SIScheduleDAGMI *DAG; SIScheduleBlockCreator *BC; @@ -92,7 +97,8 @@ unsigned ID; std::vector Preds; // All blocks predecessors. - std::vector Succs; // All blocks successors. + // All blocks successors, and the kind of link + std::vector> Succs; unsigned NumHighLatencySuccessors = 0; public: @@ -112,10 +118,11 @@ // Add block pred, which has instruction predecessor of SU. void addPred(SIScheduleBlock *Pred); - void addSucc(SIScheduleBlock *Succ); + void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind); const std::vector& getPreds() const { return Preds; } - const std::vector& getSuccs() const { return Succs; } + ArrayRef> + getSuccs() const { return Succs; } unsigned Height; // Maximum topdown path length to block without outputs unsigned Depth; // Maximum bottomup path length to block without inputs Index: lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.cpp +++ lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -542,21 +542,30 @@ Preds.push_back(Pred); assert(none_of(Succs, - [=](SIScheduleBlock *S) { return PredID == S->getID(); }) && + [=](std::pair S) { + return PredID == S.first->getID(); + }) && "Loop in the Block Graph!"); } -void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) { +void SIScheduleBlock::addSucc(SIScheduleBlock *Succ, + SIScheduleBlockLinkKind Kind) { unsigned SuccID = Succ->getID(); // Check if not already predecessor. - for (SIScheduleBlock* S : Succs) { - if (SuccID == S->getID()) + for (std::pair &S : Succs) { + if (SuccID == S.first->getID()) { + if (S.second == SIScheduleBlockLinkKind::NoData && + Kind == SIScheduleBlockLinkKind::Data) + S.second = Kind; return; + } } if (Succ->isHighLatencyBlock()) ++NumHighLatencySuccessors; - Succs.push_back(Succ); + Succs.push_back(std::make_pair(Succ, Kind)); + assert(none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"); @@ -576,8 +585,10 @@ } dbgs() << "\nSuccessors:\n"; - for (SIScheduleBlock* S : Succs) { - S->printDebug(false); + for (std::pair S : Succs) { + if (S.second == SIScheduleBlockLinkKind::Data) + dbgs() << "(Data Dep) "; + S.first->printDebug(false); } if (Scheduled) { @@ -1159,7 +1170,8 @@ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) continue; if (Node2CurrentBlock[Succ->NodeNum] != SUID) - CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]); + CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]], + SuccDep.isCtrl() ? NoData : Data); } for (SDep& PredDep : SU->Preds) { SUnit *Pred = PredDep.getSUnit(); @@ -1353,9 +1365,10 @@ Block->Height = 0; else { unsigned Height = 0; - for (SIScheduleBlock *Succ : Block->getSuccs()) { - if (Height < Succ->Height + 1) - Height = Succ->Height + 1; + for (std::pair Succ : + Block->getSuccs()) { + if (Height < Succ.first->Height + 1) + Height = Succ.first->Height + 1; } Block->Height = Height; } @@ -1671,17 +1684,16 @@ } void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) { - for (SIScheduleBlock* Block : Parent->getSuccs()) { - --BlockNumPredsLeft[Block->getID()]; - if (BlockNumPredsLeft[Block->getID()] == 0) { - ReadyBlocks.push_back(Block); + for (std::pair Block : + Parent->getSuccs()) { + --BlockNumPredsLeft[Block.first->getID()]; + if (BlockNumPredsLeft[Block.first->getID()] == 0) { + ReadyBlocks.push_back(Block.first); } - // TODO: Improve check. When the dependency between the high latency - // instructions and the instructions of the other blocks are WAR or WAW - // there will be no wait triggered. We would like these cases to not - // update LastPosHighLatencyParentScheduled. - if (Parent->isHighLatencyBlock()) - LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled; + + if (Parent->isHighLatencyBlock() && + Block.second == SIScheduleBlockLinkKind::Data) + LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled; } } @@ -2042,7 +2054,7 @@ BlockInfo.ProducedRegisters.insert(RegIdentifier); } - for (std::pair Child : + for (std::pair Child : Block->getSuccs()) { --BlockNumPredsLeftCurrent[Child.first->getID()]; if (BlockNumPredsLeftCurrent[Child.first->getID()] == 0) {