diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -691,6 +691,12 @@ std::vector &SUnits; SUnit *ExitSU; + // Have any new nodes been added? + bool Dirty = false; + + // Outstanding added edges, that have not been applied to the ordering. + SmallVector, 16> Updates; + /// Maps topological index to the node number. std::vector Index2Node; /// Maps the node number to its topological index. @@ -734,11 +740,24 @@ /// added from SUnit \p X to SUnit \p Y. void AddPred(SUnit *Y, SUnit *X); + /// Queues an update to the topological ordering to accommodate an edge to + /// be added from SUnit \p X to SUnit \p Y. + void AddPredQueued(SUnit *Y, SUnit *X); + /// Updates the topological ordering to accommodate an an edge to be /// removed from the specified node \p N from the predecessors of the /// current node \p M. void RemovePred(SUnit *M, SUnit *N); + /// Fix the ordering, by either recomputing from scratch or by applying + /// any outstanding updates. Uses a heuristic to estimate what will be + /// cheaper. + void FixOrder(); + + /// Mark the ordering as temporarily broken, after a new node has been + /// added. + void MarkDirty() { Dirty = true; } + typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; iterator begin() { return Index2Node.begin(); } diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -461,6 +461,11 @@ // On insertion of the edge X->Y, the algorithm first marks by calling DFS // the nodes reachable from Y, and then shifts them using Shift to lie // immediately after X in Index2Node. + + // Cancel pending updates, mark as valid. + Dirty = false; + Updates.clear(); + unsigned DAGSize = SUnits.size(); std::vector WorkList; WorkList.reserve(DAGSize); @@ -514,6 +519,31 @@ #endif } +void ScheduleDAGTopologicalSort::FixOrder() { + // Recompute from scratch after new nodes have been added. + if (Dirty) { + InitDAGTopologicalSorting(); + return; + } + + // Otherwise apply updates one-by-one. + for (auto &U : Updates) + AddPred(U.first, U.second); + Updates.clear(); +} + +void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) { + // Recomputing the order from scratch is likely more efficient than applying + // updates one-by-one for too many updates. The current cut-off is arbitrarily + // chosen. + Dirty = Dirty || Updates.size() > 10; + + if (Dirty) + return; + + Updates.emplace_back(Y, X); +} + void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; LowerBound = Node2Index[Y->NodeNum]; @@ -683,6 +713,7 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, const SUnit *TargetSU) { + assert(!Dirty && Updates.empty() && "Topological order is outdated"); // If insertion of the edge SU->TargetSU would create a cycle // then there is a path from TargetSU to SU. int UpperBound, LowerBound; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -213,12 +213,23 @@ return Topo.IsReachable(SU, TargetSU); } + /// FixOrder - Apply outstanding updates to topological ordering. + void FixOrder() { return Topo.FixOrder(); } + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will /// create a cycle. bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { return Topo.WillCreateCycle(SU, TargetSU); } + /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Does *NOT* update the topological ordering! It just queues an update. + void AddPredQueued(SUnit *SU, const SDep &D) { + Topo.AddPredQueued(SU, D.getSUnit()); + SU->addPred(D); + } + /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. /// Updates the topological ordering if required. @@ -266,24 +277,22 @@ void ListScheduleBottomUp(); /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. - /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); return NewNode; } /// CreateClone - Creates a new SUnit from an existing one. - /// Updates the topological ordering if required. SUnit *CreateClone(SUnit *N) { unsigned NumSUnits = SUnits.size(); SUnit *NewNode = Clone(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); return NewNode; } @@ -365,7 +374,7 @@ BuildSchedGraph(nullptr); LLVM_DEBUG(dump()); - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); AvailableQueue->initNodes(SUnits); @@ -1017,8 +1026,9 @@ NewSU = &SUnits[N->getNodeId()]; // If NewSU has already been scheduled, we need to clone it, but this // negates the benefit to unfolding so just return SU. - if (NewSU->isScheduled) + if (NewSU->isScheduled) { return SU; + } isNewN = false; } else { NewSU = CreateNewSUnit(N); @@ -1071,23 +1081,23 @@ for (const SDep &Pred : ChainPreds) { RemovePred(SU, Pred); if (isNewLoad) - AddPred(LoadSU, Pred); + AddPredQueued(LoadSU, Pred); } for (const SDep &Pred : LoadPreds) { RemovePred(SU, Pred); if (isNewLoad) - AddPred(LoadSU, Pred); + AddPredQueued(LoadSU, Pred); } for (const SDep &Pred : NodePreds) { RemovePred(SU, Pred); - AddPred(NewSU, Pred); + AddPredQueued(NewSU, Pred); } for (SDep D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); D.setSUnit(NewSU); - AddPred(SuccDep, D); + AddPredQueued(SuccDep, D); // Balance register pressure. if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) @@ -1099,7 +1109,7 @@ RemovePred(SuccDep, D); if (isNewLoad) { D.setSUnit(LoadSU); - AddPred(SuccDep, D); + AddPredQueued(SuccDep, D); } } @@ -1107,7 +1117,7 @@ // by LoadSU. SDep D(LoadSU, SDep::Data, 0); D.setLatency(LoadSU->Latency); - AddPred(NewSU, D); + AddPredQueued(NewSU, D); if (isNewLoad) AvailableQueue->addNode(LoadSU); @@ -1179,7 +1189,7 @@ // New SUnit has the exact same predecessors. for (SDep &Pred : SU->Preds) if (!Pred.isArtificial()) - AddPred(NewSU, Pred); + AddPredQueued(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. @@ -1191,7 +1201,7 @@ if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(NewSU); - AddPred(SuccSU, D); + AddPredQueued(SuccSU, D); D.setSUnit(SU); DelDeps.push_back(std::make_pair(SuccSU, D)); } @@ -1230,14 +1240,14 @@ if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(CopyToSU); - AddPred(SuccSU, D); + AddPredQueued(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, Succ)); } else { // Avoid scheduling the def-side copy before other successors. Otherwise // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. - AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); + AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } for (auto &DelDep : DelDeps) @@ -1245,10 +1255,10 @@ SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); - AddPred(CopyFromSU, FromDep); + AddPredQueued(CopyFromSU, FromDep); SDep ToDep(CopyFromSU, SDep::Data, 0); ToDep.setLatency(CopyFromSU->Latency); - AddPred(CopyToSU, ToDep); + AddPredQueued(CopyToSU, ToDep); AvailableQueue->updateNode(SU); AvailableQueue->addNode(CopyFromSU); @@ -1478,6 +1488,13 @@ if (CurSU) return CurSU; + // We query the topological order in the loop body, so make sure outstanding + // updates are applied before entering it (we only enter the loop if there + // are some interferences). If we make changes to the ordering, we exit + // the loop. + if (!Interferences.empty()) + Topo.FixOrder(); + // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies // to resolve it. @@ -1507,7 +1524,7 @@ } LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" << TrySU->NodeNum << ")\n"); - AddPred(TrySU, SDep(BtSU, SDep::Artificial)); + AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer available. @@ -1561,14 +1578,14 @@ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -3000,6 +3017,7 @@ if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) goto outer_loop_continue; // Don't introduce graph cycles. + scheduleDAG->FixOrder(); if (scheduleDAG->IsReachable(&SU, PredSuccSU)) goto outer_loop_continue; } @@ -3017,9 +3035,9 @@ if (SuccSU != &SU) { Edge.setSUnit(PredSU); scheduleDAG->RemovePred(SuccSU, Edge); - scheduleDAG->AddPred(&SU, Edge); + scheduleDAG->AddPredQueued(&SU, Edge); Edge.setSUnit(&SU); - scheduleDAG->AddPred(SuccSU, Edge); + scheduleDAG->AddPredQueued(SuccSU, Edge); --i; } } @@ -3093,6 +3111,7 @@ SuccOpc == TargetOpcode::INSERT_SUBREG || SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; + scheduleDAG->FixOrder(); if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && (!canClobber(SuccSU, DUSU) || (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || @@ -3101,7 +3120,7 @@ LLVM_DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); + scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); } } }