diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1581,16 +1581,32 @@ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" << SUb->NodeNum << ")\n"); - // Copy successor edges from SUa to SUb. Interleaving computation - // dependent on SUa can prevent load combining due to register reuse. - // Predecessor edges do not need to be copied from SUb to SUa since nearby - // loads should have effectively the same inputs. - for (const SDep &Succ : SUa->Succs) { - if (Succ.getSUnit() == SUb) - continue; - LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum - << ")\n"); - DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + if (IsLoad) { + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since + // nearby loads should have effectively the same inputs. + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) + continue; + LLVM_DEBUG(dbgs() + << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + } + } else { + // Copy predecessor edges from SUb to SUa to avoid the SUnits that + // SUb dependent on scheduled in-between SUb and SUa. Successor edges + // do not need to be copied from SUa to SUb since no one will depend + // on stores. + // Notice that, we don't need to care about the memory dependency as + // we won't try to cluster them if they have any memory dependency. + for (const SDep &Pred : SUb->Preds) { + if (Pred.getSUnit() == SUa) + continue; + LLVM_DEBUG(dbgs() + << " Copy Pred SU(" << Pred.getSUnit()->NodeNum << ")\n"); + DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial)); + } } ++ClusterLength; } else