Index: llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h +++ llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h @@ -54,6 +54,11 @@ class SIScheduleDAGMI; class SIScheduleBlockCreator; +enum SIScheduleBlockLinkKind { + NoData, + Data +}; + class SIScheduleBlock { SIScheduleDAGMI *DAG; SIScheduleBlockCreator *BC; @@ -92,7 +97,8 @@ unsigned ID; std::vector Preds; // All blocks predecessors. - std::vector Succs; // All blocks successors. + // All blocks successors, and the kind of link + std::vector> Succs; unsigned NumHighLatencySuccessors = 0; public: @@ -112,10 +118,11 @@ // Add block pred, which has instruction predecessor of SU. void addPred(SIScheduleBlock *Pred); - void addSucc(SIScheduleBlock *Succ); + void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind); const std::vector& getPreds() const { return Preds; } - const std::vector& getSuccs() const { return Succs; } + ArrayRef> + getSuccs() const { return Succs; } unsigned Height; // Maximum topdown path length to block without outputs unsigned Depth; // Maximum bottomup path length to block without inputs Index: llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -539,21 +539,30 @@ Preds.push_back(Pred); assert(none_of(Succs, - [=](SIScheduleBlock *S) { return PredID == S->getID(); }) && + [=](std::pair S) { + return PredID == S.first->getID(); + }) && "Loop in the Block Graph!"); } -void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) { +void SIScheduleBlock::addSucc(SIScheduleBlock *Succ, + SIScheduleBlockLinkKind Kind) { unsigned SuccID = Succ->getID(); // Check if not already predecessor. - for (SIScheduleBlock* S : Succs) { - if (SuccID == S->getID()) + for (std::pair &S : Succs) { + if (SuccID == S.first->getID()) { + if (S.second == SIScheduleBlockLinkKind::NoData && + Kind == SIScheduleBlockLinkKind::Data) + S.second = Kind; return; + } } if (Succ->isHighLatencyBlock()) ++NumHighLatencySuccessors; - Succs.push_back(Succ); + Succs.push_back(std::make_pair(Succ, Kind)); + assert(none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"); @@ -573,8 +582,10 @@ } dbgs() << "\nSuccessors:\n"; - for (SIScheduleBlock* S : Succs) { - S->printDebug(false); + for (std::pair S : Succs) { + if (S.second == SIScheduleBlockLinkKind::Data) + dbgs() << "(Data Dep) "; + S.first->printDebug(false); } if (Scheduled) { @@ -1096,7 +1107,8 @@ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) continue; if (Node2CurrentBlock[Succ->NodeNum] != SUID) - CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]); + CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]], + SuccDep.isCtrl() ? NoData : Data); } for (SDep& PredDep : SU->Preds) { SUnit *Pred = PredDep.getSUnit(); @@ -1290,10 +1302,8 @@ Block->Height = 0; else { unsigned Height = 0; - for (SIScheduleBlock *Succ : Block->getSuccs()) { - if (Height < Succ->Height + 1) - Height = Succ->Height + 1; - } + for (const auto &Succ : Block->getSuccs()) + Height = std::min(Height, Succ.first->Height + 1); Block->Height = Height; } } @@ -1574,17 +1584,13 @@ } void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) { - for (SIScheduleBlock* Block : Parent->getSuccs()) { - --BlockNumPredsLeft[Block->getID()]; - if (BlockNumPredsLeft[Block->getID()] == 0) { - ReadyBlocks.push_back(Block); - } - // TODO: Improve check. When the dependency between the high latency - // instructions and the instructions of the other blocks are WAR or WAW - // there will be no wait triggered. We would like these cases to not - // update LastPosHighLatencyParentScheduled. - if (Parent->isHighLatencyBlock()) - LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled; + for (const auto &Block : Parent->getSuccs()) { + if (--BlockNumPredsLeft[Block.first->getID()] == 0) + ReadyBlocks.push_back(Block.first); + + if (Parent->isHighLatencyBlock() && + Block.second == SIScheduleBlockLinkKind::Data) + LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled; } }