diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1581,16 +1581,32 @@
         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
       LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
                         << SUb->NodeNum << ")\n");
-      // Copy successor edges from SUa to SUb. Interleaving computation
-      // dependent on SUa can prevent load combining due to register reuse.
-      // Predecessor edges do not need to be copied from SUb to SUa since nearby
-      // loads should have effectively the same inputs.
-      for (const SDep &Succ : SUa->Succs) {
-        if (Succ.getSUnit() == SUb)
-          continue;
-        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
-                          << ")\n");
-        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+      if (IsLoad) {
+        // Copy successor edges from SUa to SUb. Interleaving computation
+        // dependent on SUa can prevent load combining due to register reuse.
+        // Predecessor edges do not need to be copied from SUb to SUa since
+        // nearby loads should have effectively the same inputs.
+        for (const SDep &Succ : SUa->Succs) {
+          if (Succ.getSUnit() == SUb)
+            continue;
+          LLVM_DEBUG(dbgs()
+                     << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
+          DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+        }
+      } else {
+        // Copy predecessor edges from SUb to SUa to avoid the SUnits that
+        // SUb dependent on scheduled in-between SUb and SUa. Successor edges
+        // do not need to be copied from SUa to SUb since no one will depend
+        // on stores.
+        // Notice that, we don't need to care about the memory dependency as
+        // we won't try to cluster them if they have any memory dependency.
+        for (const SDep &Pred : SUb->Preds) {
+          if (Pred.getSUnit() == SUa)
+            continue;
+          LLVM_DEBUG(dbgs()
+                     << "  Copy Pred SU(" << Pred.getSUnit()->NodeNum << ")\n");
+          DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
+        }
       }
       ++ClusterLength;
     } else