Index: include/llvm/CodeGen/TargetPassConfig.h
===================================================================
--- include/llvm/CodeGen/TargetPassConfig.h
+++ include/llvm/CodeGen/TargetPassConfig.h
@@ -22,6 +22,7 @@
 
 class PassConfigImpl;
 class ScheduleDAGInstrs;
+class ScheduleDAGMutation;
 class TargetMachine;
 struct MachineSchedContext;
 
@@ -251,6 +252,15 @@
     return nullptr;
   }
 
+  /// When EnableMacroFusion is true, create target defined MacroFusion
+  /// instance to be run within default pre-scheduler.
+  ///
+  /// Return NULL to select the default MacroFusion.
+  virtual ScheduleDAGMutation *
+  createMacroFusion(ScheduleDAGInstrs *DAG) const {
+    return nullptr;
+  }
+
   /// printAndVerify - Add a pass to dump then verify the machine function, if
   /// those steps are enabled.
   ///
Index: lib/CodeGen/MachineScheduler.cpp
===================================================================
--- lib/CodeGen/MachineScheduler.cpp
+++ lib/CodeGen/MachineScheduler.cpp
@@ -3119,8 +3119,12 @@
     if (DAG->TII->enableClusterStores())
       DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
   }
-  if (EnableMacroFusion)
-    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
+  if (EnableMacroFusion) {
+    if (ScheduleDAGMutation *Fusion = C->PassConfig->createMacroFusion(DAG))
+      DAG->addMutation(std::unique_ptr<ScheduleDAGMutation>(Fusion));
+    else
+      DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
+  }
   return DAG;
 }
 
Index: lib/CodeGen/PostRASchedulerList.cpp
===================================================================
--- lib/CodeGen/PostRASchedulerList.cpp
+++ lib/CodeGen/PostRASchedulerList.cpp
@@ -122,6 +122,9 @@
     /// added to the AvailableQueue.
     std::vector<SUnit*> PendingQueue;
 
+    /// Record the next node in a scheduled cluster.
+    SUnit *NextClusterSucc;
+
     /// HazardRec - The hazard recognizer to use.
     ScheduleHazardRecognizer *HazardRec;
 
@@ -208,8 +211,8 @@
     const RegisterClassInfo &RCI,
     TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
     SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
-    : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
-
+    : ScheduleDAGInstrs(MF, &MLI), NextClusterSucc(nullptr), AA(AA), EndIndex(0)
+{
   const InstrItineraryData *InstrItins =
       MF.getSubtarget().getInstrItineraryData();
   HazardRec =
@@ -461,6 +464,11 @@
 
   if (SuccEdge->isWeak()) {
     --SuccSU->WeakPredsLeft;
+    // Cluster instructions get higher scheduling priority.
+    // If SuccSU is not blocked by any other predecessors, let scheduler pick
+    // SuccSU as next scheduling instruction.
+    if (SuccEdge->isCluster() && SuccSU->NumPredsLeft == 0)
+      NextClusterSucc = SuccSU;
     return;
   }
 #ifndef NDEBUG
@@ -550,54 +558,77 @@
   // stall or emit a noop, depending on the target.
   bool CycleHasInsts = false;
 
+  assert(!NextClusterSucc && "Incorrect scheduling state.");
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   std::vector<SUnit*> NotReady;
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue.empty() || !PendingQueue.empty()) {
-    // Check to see if any of the pending instructions are ready to issue.  If
-    // so, add them to the available queue.
-    unsigned MinDepth = ~0u;
-    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->getDepth() <= CurCycle) {
-        AvailableQueue.push(PendingQueue[i]);
-        PendingQueue[i]->isAvailable = true;
-        PendingQueue[i] = PendingQueue.back();
-        PendingQueue.pop_back();
-        --i; --e;
-      } else if (PendingQueue[i]->getDepth() < MinDepth)
-        MinDepth = PendingQueue[i]->getDepth();
-    }
-
-    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
-
     SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
     bool HasNoopHazards = false;
-    while (!AvailableQueue.empty()) {
-      SUnit *CurSUnit = AvailableQueue.pop();
-
-      ScheduleHazardRecognizer::HazardType HT =
-        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
-      if (HT == ScheduleHazardRecognizer::NoHazard) {
-        if (HazardRec->ShouldPreferAnother(CurSUnit)) {
-          if (!NotPreferredSUnit) {
-            // If this is the first non-preferred node for this cycle, then
-            // record it and continue searching for a preferred node. If this
-            // is not the first non-preferred node, then treat it as though
-            // there had been a hazard.
-            NotPreferredSUnit = CurSUnit;
-            continue;
+
+    // Clustered instructions get higher scheduling priority, because processor
+    // can work efficiently if these instructions can be scheduled nearby, so
+    // if we have clustered instructions, scheduling them first.
+    if (NextClusterSucc) {
+      // Wait until we have correct cycle count and no more hazard.
+      if (NextClusterSucc->getDepth() <= CurCycle &&
+          HazardRec->getHazardType(NextClusterSucc, 0/*no stalls*/) ==
+          ScheduleHazardRecognizer::NoHazard) {
+        for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i)
+          if (PendingQueue[i] == NextClusterSucc) {
+            PendingQueue.erase(PendingQueue.begin() + i);
+            break;
           }
-        } else {
-          FoundSUnit = CurSUnit;
-          break;
-        }
+
+        FoundSUnit = NextClusterSucc;
+        NextClusterSucc->isAvailable = true;
+        NextClusterSucc = nullptr;
+      }
+    } else {
+      // Check to see if any of the pending instructions are ready to issue.  If
+      // so, add them to the available queue.
+      unsigned MinDepth = ~0u;
+      for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+        if (PendingQueue[i]->getDepth() <= CurCycle) {
+          AvailableQueue.push(PendingQueue[i]);
+          PendingQueue[i]->isAvailable = true;
+          PendingQueue[i] = PendingQueue.back();
+          PendingQueue.pop_back();
+          --i; --e;
+        } else if (PendingQueue[i]->getDepth() < MinDepth)
+          MinDepth = PendingQueue[i]->getDepth();
       }
 
-      // Remember if this is a noop hazard.
-      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+      DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+      while (!AvailableQueue.empty()) {
+        SUnit *CurSUnit = AvailableQueue.pop();
+
+        ScheduleHazardRecognizer::HazardType HT =
+          HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+        if (HT == ScheduleHazardRecognizer::NoHazard) {
+          if (HazardRec->ShouldPreferAnother(CurSUnit)) {
+            if (!NotPreferredSUnit) {
+              // If this is the first non-preferred node for this cycle, then
+              // record it and continue searching for a preferred node. If this
+              // is not the first non-preferred node, then treat it as though
+              // there had been a hazard.
+              NotPreferredSUnit = CurSUnit;
+              continue;
+            }
+          } else {
+            FoundSUnit = CurSUnit;
+            break;
+          }
+        }
+
+        // Remember if this is a noop hazard.
+        HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
 
-      NotReady.push_back(CurSUnit);
+        NotReady.push_back(CurSUnit);
+      }
     }
 
     // If we have a non-preferred node, push it back onto the available list.