Index: llvm/lib/CodeGen/MacroFusion.cpp =================================================================== --- llvm/lib/CodeGen/MacroFusion.cpp +++ llvm/lib/CodeGen/MacroFusion.cpp @@ -36,6 +36,24 @@ return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; } +namespace { + +static SUnit *getPredClusterSU(const SUnit &SU) { + for (const SDep &SI : SU.Preds) + if (SI.isCluster()) + return SI.getSUnit(); + + return nullptr; +} + +static SUnit *getSuccClusterSU(const SUnit &SU) { + for (const SDep &SI : SU.Succs) + if (SI.isCluster()) + return SI.getSUnit(); + + return nullptr; +} + static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge @@ -73,27 +91,34 @@ // Make data dependencies from the FirstSU also dependent on the SecondSU to // prevent them from being scheduled between the FirstSU and the SecondSU. - if (&SecondSU != &DAG.ExitSU) + SUnit *CurrentSU = &SecondSU; + while (CurrentSU && CurrentSU != &DAG.ExitSU) { for (const SDep &SI : FirstSU.Succs) { SUnit *SU = SI.getSUnit(); if (SI.isWeak() || isHazard(SI) || - SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU)) + SU == &DAG.ExitSU || SU == CurrentSU || + SU->isPred(CurrentSU)) continue; - LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(SecondSU); + LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(*CurrentSU); dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';); - DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial)); + DAG.addEdge(SU, SDep(CurrentSU, SDep::Artificial)); } + CurrentSU = getSuccClusterSU(*CurrentSU); + } + // Make the FirstSU also dependent on the dependencies of the SecondSU to // prevent them from being scheduled between the FirstSU and the SecondSU. - if (&FirstSU != &DAG.EntrySU) { + CurrentSU = &FirstSU; + while (CurrentSU && CurrentSU != &DAG.EntrySU) { for (const SDep &SI : SecondSU.Preds) { SUnit *SU = SI.getSUnit(); - if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU)) + if (SI.isWeak() || isHazard(SI) || CurrentSU == SU || + CurrentSU->isSucc(SU)) continue; LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(*SU); dbgs() << " - "; - DAG.dumpNodeName(FirstSU); dbgs() << '\n';); - DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial)); + DAG.dumpNodeName(*CurrentSU); dbgs() << '\n';); + DAG.addEdge(CurrentSU, SDep(SU, SDep::Artificial)); } // ExitSU comes last by design, which acts like an implicit dependency // between ExitSU and any bottom root in the graph. We should transfer @@ -101,17 +126,17 @@ if (&SecondSU == &DAG.ExitSU) { for (SUnit &SU : DAG.SUnits) { if (SU.Succs.empty()) - DAG.addEdge(&FirstSU, SDep(&SU, SDep::Artificial)); + DAG.addEdge(CurrentSU, SDep(&SU, SDep::Artificial)); } } + + CurrentSU = getPredClusterSU(*CurrentSU); } ++NumFused; return true; } -namespace { - /// Post-process the DAG to create cluster edges between instrs that may /// be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { Index: llvm/test/CodeGen/AArch64/macro-fusion.ll =================================================================== --- llvm/test/CodeGen/AArch64/macro-fusion.ll +++ llvm/test/CodeGen/AArch64/macro-fusion.ll @@ -10,6 +10,7 @@ ; CHECK: Bind SU([[SU1:[0-9]+]]) - SU([[SU4]]) ; CHECK: Macro fuse: SU([[SU5]]) - SU([[SU6:[0-9]+]]) ; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU5]]) +; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU4]]) ; CHECK: SU([[SU0]]): %{{[0-9]+}}:gpr32 = COPY $w3 ; CHECK: SU([[SU1]]): %{{[0-9]+}}:gpr32 = COPY $w2 ; CHECK: SU([[SU4]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr