Index: lib/Target/AMDGPU/SIMachineScheduler.h =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.h +++ lib/Target/AMDGPU/SIMachineScheduler.h @@ -449,6 +449,7 @@ LiveIntervals *getLIS() { return LIS; } MachineRegisterInfo *getMRI() { return &MRI; } const TargetRegisterInfo *getTRI() { return TRI; } + ScheduleDAGTopologicalSort *GetTopo() { return &Topo; } SUnit& getEntrySU() { return EntrySU; } SUnit& getExitSU() { return ExitSU; } Index: lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.cpp +++ lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -655,7 +655,7 @@ unsigned DAGSize = DAG->SUnits.size(); unsigned NumHighLatencies = 0; unsigned GroupSize; - unsigned Color = NextReservedID; + int Color = NextReservedID; unsigned Count = 0; std::set FormingGroup; @@ -676,34 +676,107 @@ GroupSize = 4; for (unsigned i = 0, e = DAGSize; i != e; ++i) { - SUnit *SU = &DAG->SUnits[i]; - if (DAG->IsHighLatencySU[SU->NodeNum]) { + const SUnit &SU = DAG->SUnits[i]; + if (DAG->IsHighLatencySU[SU.NodeNum]) { unsigned CompatibleGroup = true; - unsigned ProposedColor = Color; + int ProposedColor = Color; + std::vector AdditionalElements; + + // We don't want to put in the same block + // two high latency instructions that depend + // on each other. + // One way would be to check canAddEdge + // in both directions, but that currently is not + // enough because there the high latency order is + // enforced (via links). + // Instead of look at the dependencies between the + // high latency instructions and deduce if it is + // a data dependency or not. for (unsigned j : FormingGroup) { - // TODO: Currently CompatibleGroup will always be false, - // because the graph enforces the load order. This - // can be fixed, but as keeping the load order is often - // good for performance that causes a performance hit (both - // the default scheduler and this scheduler). - // When this scheduler determines a good load order, - // this can be fixed. - if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) || - !DAG->canAddEdge(&DAG->SUnits[j], SU)) + bool HasSubGraph; + // We find a path from one of the SU to the other + // Note that there can be only one successful GetSubGraph (no + // circular dependency). + std::vector SubGraph = + DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU, HasSubGraph); + if (!HasSubGraph) + SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], + HasSubGraph); + if (!HasSubGraph) + continue; // No dependencies between each other + else if (SubGraph.size() > 5) { + // Too many elements would be required to be added to the block. CompatibleGroup = false; + break; + } + else { + // Check the type of dependency + for (unsigned k : SubGraph) { + // If in the path to join the two instructions, + // there is another high latency instruction, + // or instructions colored for another block + // abort the merge. + if (DAG->IsHighLatencySU[k] || + (CurrentColoring[k] != ProposedColor && + CurrentColoring[k] != 0)) { + CompatibleGroup = false; + break; + } + for (SDep& PredDep : (&DAG->SUnits[k])->Preds) { + // We don't want any instruction which directly depend on + // another high latency of the same group. + // We do allow only order dependencies (WAR and WAW). + if (PredDep.getSUnit()->NodeNum == j && + PredDep.getKind() == llvm::SDep::Data) { + CompatibleGroup = false; + break; + } + } + if (!CompatibleGroup) + break; + } + // Same check for the SU + for (const SDep& PredDep : SU.Preds) { + if (PredDep.getSUnit()->NodeNum == j && + PredDep.getKind() == llvm::SDep::Data) { + CompatibleGroup = false; + break; + } + } + if (!CompatibleGroup) + break; + // Add all the required instructions to the block + // These cannot live in another block (because they + // depend (order dependency) on one of the + // instruction in the block, and are required for the + // high latency instruction we add. + AdditionalElements.insert(AdditionalElements.end(), + SubGraph.begin(), SubGraph.end()); + } } - if (!CompatibleGroup || ++Count == GroupSize) { + if (CompatibleGroup) { + FormingGroup.insert(SU.NodeNum); + for (unsigned j : AdditionalElements) + CurrentColoring[j] = ProposedColor; + CurrentColoring[SU.NodeNum] = ProposedColor; + ++Count; + } + // Found one incompatible instruction, + // or has filled a big enough group. + // -> start a new one. + if (!CompatibleGroup) { FormingGroup.clear(); Color = ++NextReservedID; - if (!CompatibleGroup) { - ProposedColor = Color; - FormingGroup.insert(SU->NodeNum); - } + ProposedColor = Color; + FormingGroup.insert(SU.NodeNum); + CurrentColoring[SU.NodeNum] = ProposedColor; + Count = 0; + } else if (Count == GroupSize) { + FormingGroup.clear(); + Color = ++NextReservedID; + ProposedColor = Color; Count = 0; - } else { - FormingGroup.insert(SU->NodeNum); } - CurrentColoring[SU->NodeNum] = ProposedColor; } } }