Index: include/llvm/CodeGen/MachineScheduler.h
===================================================================
--- include/llvm/CodeGen/MachineScheduler.h
+++ include/llvm/CodeGen/MachineScheduler.h
@@ -214,9 +214,17 @@
   /// This has to be enabled in combination with shouldTrackPressure().
   virtual bool shouldTrackLaneMasks() const { return false; }
 
+  // If this method returns true, handling of the scheduling regions
+  // themselves (in case of a scheduling boundary in MBB) will be done
+  // beginning with the topmost region of MBB.
+  virtual bool doMBBSchedRegionsTopDown() const { return false; }
+
   /// Initialize the strategy after building the DAG for a new region.
   virtual void initialize(ScheduleDAGMI *DAG) = 0;
 
+  /// Tell the strategy that current MBB is done.
+  virtual void leaveMBB(MachineBasicBlock *MBB) {};
+
   /// Notify this strategy that all roots have been released (including those
   /// that depend on EntrySU or ExitSU).
   virtual void registerRoots() {}
@@ -284,6 +292,13 @@
   // Provide a vtable anchor
   ~ScheduleDAGMI() override;
 
+  /// If this method returns true, handling of the scheduling regions
+  /// themselves (in case of a scheduling boundary in MBB) will be done
+  /// beginning with the topmost region of MBB.
+  bool doMBBSchedRegionsTopDown() const override {
+    return SchedImpl->doMBBSchedRegionsTopDown();
+  }
+
   // Returns LiveIntervals instance for use in DAG mutators and such.
   LiveIntervals *getLIS() const { return LIS; }
 
@@ -326,6 +341,8 @@
   /// reorderable instructions.
   void schedule() override;
 
+  void finishBlock() override;
+
   /// Change the position of an instruction within the basic block and update
   /// live ranges and region boundary iterators.
   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
Index: include/llvm/CodeGen/ScheduleDAGInstrs.h
===================================================================
--- include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -275,6 +275,11 @@
     /// Returns an existing SUnit for this MI, or nullptr.
     SUnit *getSUnit(MachineInstr *MI) const;
 
+    /// If this method returns true, handling of the scheduling regions
+    /// themselves (in case of a scheduling boundary in MBB) will be done
+    /// beginning with the topmost region of MBB.
+    virtual bool doMBBSchedRegionsTopDown() const { return false; }
+
     /// Prepares to perform scheduling in the given block.
     virtual void startBlock(MachineBasicBlock *BB);
 
Index: lib/CodeGen/MachineScheduler.cpp
===================================================================
--- lib/CodeGen/MachineScheduler.cpp
+++ lib/CodeGen/MachineScheduler.cpp
@@ -405,6 +405,7 @@
 
   // Initialize the context of the pass.
   MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
   PassConfig = &getAnalysis<TargetPassConfig>();
 
   if (VerifyScheduling)
@@ -437,11 +438,80 @@
   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
 }
 
+/// A region of an MBB for scheduling.
+struct SchedRegion {
+  MachineBasicBlock::iterator RegionBegin;
+  MachineBasicBlock::iterator RegionEnd;
+  unsigned NumRegionInstrs;
+  SchedRegion(MachineBasicBlock::iterator b, MachineBasicBlock::iterator e,
+              unsigned n) :
+    RegionBegin(b), RegionEnd(e), NumRegionInstrs(n) {}
+};
+
+/// A vector holding the scheduling regions of a single MBB. Regions will be
+/// traversed top-down or bottom-up depending on TopDown. Note that the
+/// regions are extracted with a bottom-up traversal initially.
+typedef std::vector<SchedRegion> StdVecSchedReg;
+class MBBRegionsVector : public StdVecSchedReg{
+  bool TopDown;
+  iterator CurrRegion;
+
+public:
+  MBBRegionsVector(bool TD) : TopDown(TD), CurrRegion(nullptr) {}
+
+  StdVecSchedReg::iterator reset() {
+    if (!TopDown)
+      CurrRegion = StdVecSchedReg::begin();
+    else
+      CurrRegion = (empty() ? end() : std::prev(end()));
+    return CurrRegion;
+  }
+
+  StdVecSchedReg::iterator nextRegion() {
+    if (!TopDown)
+      CurrRegion++;
+    else
+      CurrRegion =
+        ((CurrRegion == StdVecSchedReg::begin()) ? end() : --CurrRegion);
+    return CurrRegion;
+  }
+};
+
+static void
+getSchedRegions(MachineBasicBlock *MBB,
+                MBBRegionsVector &Regions) {
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+  MachineBasicBlock::iterator I = nullptr;
+  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+      RegionEnd != MBB->begin(); RegionEnd = I) {
+
+    // Avoid decrementing RegionEnd for blocks with no terminator.
+    if (RegionEnd != MBB->end() ||
+        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
+      --RegionEnd;
+    }
+
+    // The next region starts above the previous region. Look backward in the
+    // instruction stream until we find the nearest boundary.
+    unsigned NumRegionInstrs = 0;
+    I = RegionEnd;
+    for (;I != MBB->begin(); --I) {
+      MachineInstr &MI = *std::prev(I);
+      if (isSchedBoundary(&MI, &*MBB, MF, TII))
+        break;
+      if (!MI.isDebugValue())
+        ++NumRegionInstrs;
+    }
+
+    Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+  }
+}
+
 /// Main driver for both MachineScheduler and PostMachineScheduler.
 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
                                            bool FixKillFlags) {
-  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
   // Visit all machine basic blocks.
   //
   // TODO: Visit blocks in global postorder or postorder within the bottom-up
@@ -472,26 +542,15 @@
     //
     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
     // as a single instruction.
-    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
-        RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 
-      // Avoid decrementing RegionEnd for blocks with no terminator.
-      if (RegionEnd != MBB->end() ||
-          isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
-        --RegionEnd;
-      }
+    MBBRegionsVector MBBRegions(Scheduler.doMBBSchedRegionsTopDown());
+    getSchedRegions(&*MBB, MBBRegions);
+    for (MBBRegionsVector::iterator R = MBBRegions.reset();
+         R != MBBRegions.end(); R = MBBRegions.nextRegion()) {
+      MachineBasicBlock::iterator I = R->RegionBegin;
+      MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
+      unsigned NumRegionInstrs = R->NumRegionInstrs;
 
-      // The next region starts above the previous region. Look backward in the
-      // instruction stream until we find the nearest boundary.
-      unsigned NumRegionInstrs = 0;
-      MachineBasicBlock::iterator I = RegionEnd;
-      for (; I != MBB->begin(); --I) {
-        MachineInstr &MI = *std::prev(I);
-        if (isSchedBoundary(&MI, &*MBB, MF, TII))
-          break;
-        if (!MI.isDebugValue())
-          ++NumRegionInstrs;
-      }
       // Notify the scheduler of the region, even if we may skip scheduling
       // it. Perhaps it still needs to be bundled.
       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
@@ -517,15 +576,11 @@
       }
 
       // Schedule a region: possibly reorder instructions.
-      // This invalidates 'RegionEnd' and 'I'.
+      // This invalidates the original region iterators.
       Scheduler.schedule();
 
       // Close the current region.
       Scheduler.exitRegion();
-
-      // Scheduling has invalidated the current iterator 'I'. Ask the
-      // scheduler for the top of it's scheduled region.
-      RegionEnd = Scheduler.begin();
     }
     Scheduler.finishBlock();
     // FIXME: Ideally, no further passes should rely on kill flags. However,
@@ -654,6 +709,11 @@
   }
 }
 
+void ScheduleDAGMI::finishBlock() {
+  SchedImpl->leaveMBB(BB);
+  ScheduleDAGInstrs::finishBlock();
+}
+
 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 /// the region, including the boundary itself and single-instruction regions
Index: lib/Target/SystemZ/SystemZHazardRecognizer.h
===================================================================
--- lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -19,6 +19,13 @@
 // * Processor resources usage. It is beneficial to balance the use of
 // resources.
 //
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
 // ===---------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
@@ -35,10 +42,10 @@
 
 namespace llvm {
 
-/// SystemZHazardRecognizer maintains the state during scheduling.
+/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
 class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
 
-  ScheduleDAGMI *DAG;
+  const SystemZInstrInfo *TII;
   const TargetSchedModel *SchedModel;
 
   /// Keep track of the number of decoder slots used in the current
@@ -88,18 +95,28 @@
   /// ops, return true if it seems good to schedule an FPd op next.
   bool isFPdOpPreferred_distance(const SUnit *SU);
 
+  /// Wrap a non-scheduled instruction in an SU and emit it.
+  void emitInstruction(MachineInstr *MI);
+
+  /// Last emitted instruction or nullptr.
+  MachineInstr *LastEmittedMI;
+
 public:
-  SystemZHazardRecognizer(const MachineSchedContext *C);
+  SystemZHazardRecognizer(const SystemZInstrInfo *tii,
+                          const TargetSchedModel *SM)
+    : TII(tii), SchedModel(SM) { Reset(); }
 
-  void setDAG(ScheduleDAGMI *dag) {
-    DAG = dag;
-    SchedModel = dag->getSchedModel();
-  }
-  
   HazardType getHazardType(SUnit *m, int Stalls = 0) override;    
   void Reset() override;
   void EmitInstruction(SUnit *SU) override;
 
+  /// Resolves and cache a resolved scheduling class for an SUnit.
+  const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
+    if (!SU->SchedClass && SchedModel->hasInstrSchedModel())
+      SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr());
+    return SU->SchedClass;
+  }
+
   // Cost functions used by SystemZPostRASchedStrategy while
   // evaluating candidates.
 
@@ -121,6 +138,20 @@
   void dumpCurrGroup(std::string Msg = "") const;
   void dumpProcResourceCounters() const;
 #endif
+
+  MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; }
+
+  /// Copy counters from end of single predecessor.
+  void copyCounters(SystemZHazardRecognizer *Incoming);
+
+  /// Update the scheduler state by emitting (non-scheduled) instructions
+  /// from I to NextBegin.
+  void advance(MachineBasicBlock::iterator I,
+               MachineBasicBlock::iterator NextBegin);
+
+  /// Emit a branch in a predecessor, and return true if it is a taken branch
+  /// to (current) MBB.
+  bool emitIncomingBranch(MachineInstr *MI, MachineBasicBlock *MBB);
 };
 
 } // namespace llvm
Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp
===================================================================
--- lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -19,6 +19,13 @@
 // * Processor resources usage. It is beneficial to balance the use of
 // resources.
 //
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
 // ===---------------------------------------------------------------------===//
 
 #include "SystemZHazardRecognizer.h"
@@ -36,13 +43,9 @@
                                             "resources during scheduling."),
                                    cl::init(8));
 
-SystemZHazardRecognizer::
-SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
-                                                        SchedModel(nullptr) {}
-
 unsigned SystemZHazardRecognizer::
 getNumDecoderSlots(SUnit *SU) const {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
 
@@ -73,12 +76,13 @@
   clearProcResCounters();
   GrpCount = 0;
   LastFPdOpCycleIdx = UINT_MAX;
+  LastEmittedMI = nullptr;
   DEBUG(CurGroupDbg = "";);
 }
 
 bool
 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return true;
 
@@ -125,9 +129,9 @@
 #ifndef NDEBUG // Debug output
 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
   OS << "SU(" << SU->NodeNum << "):";
-  OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
+  OS << TII->getName(SU->getInstr()->getOpcode());
 
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return;
   
@@ -203,7 +207,7 @@
 // Update state with SU as the next scheduled unit.
 void SystemZHazardRecognizer::
 EmitInstruction(SUnit *SU) {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   DEBUG( dumpCurrGroup("Decode group before emission"););
 
   // If scheduling an SU that must begin a new decoder group, move on
@@ -218,8 +222,10 @@
            cgd << ", ";
          dumpSU(SU, cgd););
 
+  LastEmittedMI = SU->getInstr();
+
   // After returning from a call, we don't know much about the state.
-  if (SU->getInstr()->isCall()) {
+  if (SU->isCall) {
     DEBUG (dbgs() << "+++ Clearing state after call.\n";);
     clearProcResCounters();
     LastFPdOpCycleIdx = UINT_MAX;
@@ -271,7 +277,7 @@
 }
 
 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return 0;
   
@@ -315,7 +321,7 @@
 resourcesCost(SUnit *SU) {
   int Cost = 0;
 
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return 0;
 
@@ -335,3 +341,82 @@
   return Cost;
 }
 
+void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI) {
+  // Make a temporary SUnit.
+  SUnit SU(MI, 0);
+
+  // Set interesting flags.
+  SU.isCall = MI->isCall();
+
+  const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
+  for (const MCWriteProcResEntry &PRE :
+         make_range(SchedModel->getWriteProcResBegin(SC),
+                    SchedModel->getWriteProcResEnd(SC))) {
+    switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
+    case 0:
+      SU.hasReservedResource = true;
+      break;
+    case 1:
+      SU.isUnbuffered = true;
+      break;
+    default:
+      break;
+    }
+  }
+
+  EmitInstruction(&SU);
+}
+
+void SystemZHazardRecognizer::
+copyCounters(SystemZHazardRecognizer *Incoming) {
+  // Current decoder group
+  CurrGroupSize = Incoming->CurrGroupSize;
+  DEBUG (CurGroupDbg = Incoming->CurGroupDbg;);
+
+  // Processor resources
+  ProcResourceCounters = Incoming->ProcResourceCounters;
+  CriticalResourceIdx = Incoming->CriticalResourceIdx;
+
+  // FPd
+  LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
+  GrpCount = Incoming->GrpCount;
+}
+
+void SystemZHazardRecognizer::
+advance(MachineBasicBlock::iterator I,
+        MachineBasicBlock::iterator NextBegin) {
+  for (; I != NextBegin; ++I) {
+    if (I->isPosition() || I->isDebugValue())
+      continue;
+    emitInstruction(&*I);
+  }
+}
+
+bool SystemZHazardRecognizer::emitIncomingBranch(MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) {
+  DEBUG (dbgs() << "+++ Emitting incoming branch: "; MI->dump(););
+
+  emitInstruction(MI);
+
+  // Be optimistic and assume that branch prediction will generally do "the
+  // right thing".
+
+  if (MI->isBranch() &&
+      (TII->getBranchInfo(*MI).Target->isReg() || // Relative branch
+       TII->getBranchInfo(*MI).Target->getMBB() == MBB)) {
+    // Taken branch from predecessor
+    if (CurrGroupSize > 0)
+      nextGroup(false /*DbgOutput*/);
+    return true;
+  }
+
+  assert ((MI->isBranch() || MI->isReturn() ||
+           MI->getOpcode() == SystemZ::CondTrap) &&
+          "Scheduler: expected a branch or conditional return/trap");
+
+  // NT branches end group after first decoder slot.
+  if (CurrGroupSize == 2)
+    nextGroup(false /*DbgOutput*/);
+
+  return false;
+}
Index: lib/Target/SystemZ/SystemZMachineScheduler.h
===================================================================
--- lib/Target/SystemZ/SystemZMachineScheduler.h
+++ lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -11,7 +11,8 @@
 // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
 // the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
 // implementation that looks to optimize decoder grouping and balance the
-// usage of processor resources.
+// usage of processor resources. Scheduler states are saved for the end
+// region of each MBB, so that a successor block can learn from it.
 //===----------------------------------------------------------------------===//
 
 #include "SystemZHazardRecognizer.h"
@@ -28,7 +29,14 @@
   
 /// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
 class SystemZPostRASchedStrategy : public MachineSchedStrategy {
-  ScheduleDAGMI *DAG;
+
+  const MachineLoopInfo *MLI;
+  const SystemZInstrInfo *TII;
+
+  // A SchedModel is needed before any DAG is built while advancing past
+  // non-scheduled instructions, so it would not always be possible to call
+  // DAG->getSchedClass(SU).
+  TargetSchedModel SchedModel;
   
   /// A candidate during instruction evaluation.
   struct Candidate {
@@ -79,19 +87,53 @@
   /// The set of available SUs to schedule next.
   SUSet Available;
 
-  // HazardRecognizer that tracks the scheduler state for the current
-  // region.
-  SystemZHazardRecognizer HazardRec;
-  
+  /// Current MBB
+  MachineBasicBlock *MBB;
+
+  // Start and End of current region
+  MachineBasicBlock::iterator CurrBegin;
+
+  /// Maintain hazard recognizers for all blocks, so that the scheduler state
+  /// can be maintained past BB boundaries when appropariate.
+  typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec;
+  MBB2HazRec SchedStates;
+
+  /// Pointer to the HazardRecognizer that tracks the scheduler state for
+  /// the current region.
+  SystemZHazardRecognizer *HazardRec;
+
+  /// A temporary HazardRecognizer used for regions that are separated (by a
+  /// call) from the bottom-most region of the MBB.
+  SystemZHazardRecognizer *TmpHazRec;
+
+  /// Create a HazardRec for each MBB and save it in SchedStates, and set
+  /// HazardRec to point to it.
+  void setupHazardRecForScheduling();
+
+  void transferStateFromPred();
+
 public:
   SystemZPostRASchedStrategy(const MachineSchedContext *C);
+  virtual ~SystemZPostRASchedStrategy();
+
+  /// Called for a region before scheduling.
+  void initPolicy(MachineBasicBlock::iterator Begin,
+                  MachineBasicBlock::iterator End,
+                  unsigned NumRegionInstrs) override;
 
   /// PostRA scheduling does not track pressure.
   bool shouldTrackPressure() const override { return false; }
 
+  // Process scheduling regions top-down so that scheduler states can be
+  // transferrred over scheduling boundaries.
+  bool doMBBSchedRegionsTopDown() const override { return true; }
+
   /// Initialize the strategy after building the DAG for a new region.
   void initialize(ScheduleDAGMI *dag) override;
 
+  /// Tell the strategy that current MBB is done.
+  void leaveMBB(MachineBasicBlock *DoneMB) override;
+
   /// Pick the next node to schedule, or return NULL.
   SUnit *pickNode(bool &IsTopNode) override;
 
Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp
===================================================================
--- lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -11,7 +11,8 @@
 // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
 // the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
 // implementation that looks to optimize decoder grouping and balance the
-// usage of processor resources.
+// usage of processor resources. Scheduler states are saved for the end
+// region of each MBB, so that a successor block can learn from it.
 //===----------------------------------------------------------------------===//
 
 #include "SystemZMachineScheduler.h"
@@ -34,14 +35,126 @@
 }
 #endif
 
+// Try to find a single predecessor that would be interesting for the
+// scheduler in the top-most region of MBB.
+static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
+                                             const MachineLoop *Loop) {
+  MachineBasicBlock *PredMBB = nullptr;
+  if (MBB->pred_size() == 1)
+    PredMBB = *MBB->pred_begin();
+
+  // The loop header has two predecessors, return the latch, but not for a
+  // single block loop.
+  if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
+    for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I)
+      if (Loop->contains(*I))
+        PredMBB = (*I == MBB ? nullptr : *I);
+  }
+
+  assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
+          && "Loop MBB should not consider predecessor outside of loop.");
+
+  return PredMBB;
+}
+
+void SystemZPostRASchedStrategy::setupHazardRecForScheduling() {
+  // Since the MBB regions are traversed top-down, we simply create a new
+  // HazardRecognizer first time (topmost), and then resuse it next time.
+  if (SchedStates.find(MBB) == SchedStates.end()) {
+    DEBUG (dbgs() << "+++ Entering MBB#" << MBB->getNumber());
+    SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
+  } else
+    DEBUG (dbgs() << "+++ Continuing in MBB#" << MBB->getNumber());
+
+  HazardRec = SchedStates[MBB];
+
+  DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB);
+         if(Loop && Loop->getHeader() == MBB)
+           dbgs() << " (Loop header)";
+         dbgs() << ":\n";);
+}
+
+void SystemZPostRASchedStrategy::transferStateFromPred() {
+  MachineBasicBlock *SinglePredMBB =
+    getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
+  if (SinglePredMBB == nullptr ||
+      SchedStates.find(SinglePredMBB) == SchedStates.end())
+    return;
+
+  DEBUG (dbgs() << "+++ Continued scheduling from MBB#"
+         << SinglePredMBB->getNumber() << "\n";);
+
+  SchedStates[MBB]->copyCounters(SchedStates[SinglePredMBB]);
+
+  // Emit incoming terminator(s).
+  for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator();
+       I != SinglePredMBB->end(); I++)
+    if (SchedStates[MBB]->emitIncomingBranch(&*I, MBB))
+      break;
+}
+
+void SystemZPostRASchedStrategy::leaveMBB(MachineBasicBlock *DoneMBB) {
+  MBB = DoneMBB; // (MBB may not have been set in case of an empty MBB)
+  DEBUG (dbgs() << "+++ Leaving MBB#" << MBB->getNumber() << "\n";);
+
+  MachineBasicBlock::iterator I;
+  if (SchedStates.find(MBB) == SchedStates.end()) {
+    // No scheduling done. Take state from predecessor if possible and then
+    // emit all instructions.
+    SchedStates[MBB] =
+      new SystemZHazardRecognizer(TII, &SchedModel);
+    transferStateFromPred();
+    I = MBB->begin();
+  } else
+    // If scheduling was done, emit everything after the region.
+    I = std::next(SchedStates[MBB]->getLastEmittedMI());
+
+  // Advance to first terminator. The successor block will handle them in
+  // dependent on CFG layout (T/NT branch etc).
+  SchedStates[MBB]->advance(I, MBB->getFirstTerminator());
+}
+
 SystemZPostRASchedStrategy::
 SystemZPostRASchedStrategy(const MachineSchedContext *C)
-  : DAG(nullptr), HazardRec(C) {}
+  : MLI(C->MLI),
+    TII(static_cast<const SystemZInstrInfo *>
+        (C->MF->getSubtarget().getInstrInfo())), 
+    MBB(nullptr), CurrBegin(nullptr), HazardRec(nullptr) {
+  const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
+  SchedModel.init(ST->getSchedModel(), ST, TII);
+  TmpHazRec = new SystemZHazardRecognizer(TII, &SchedModel);
+}
+
+SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
+  // Delete hazard recognizers kept around for each MBB.
+  for (auto I : SchedStates) {
+    SystemZHazardRecognizer *hazrec = I.second;
+    delete hazrec;
+  }
+  delete TmpHazRec;
+}
+
+void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
+                                            MachineBasicBlock::iterator End,
+                                            unsigned NumRegionInstrs) {
+  MBB = Begin->getParent();
+  CurrBegin = Begin;
+}
 
 void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
-  DAG = dag;
-  HazardRec.setDAG(dag);
-  HazardRec.Reset();
+  setupHazardRecForScheduling();
+
+  MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
+  MachineBasicBlock::iterator PreRegBegin =
+    (LastEmittedMI != nullptr ? std::next(LastEmittedMI) : MBB->begin());
+
+  // If this is top-most in MBB, try to take over the state from a single
+  // predecessor, if it has been scheduled.
+  if (PreRegBegin == MBB->begin())
+    transferStateFromPred();
+
+  // Emit any instructions before start of region.
+  HazardRec->advance(PreRegBegin, CurrBegin);
 }
 
 // Pick the next node to schedule.
@@ -55,25 +168,25 @@
   // If only one choice, return it.
   if (Available.size() == 1) {
     DEBUG (dbgs() << "+++ Only one: ";
-           HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
+           HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
     return *Available.begin();
   }
 
   // All nodes that are possible to schedule are stored by in the
   // Available set.
-  DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec););
+  DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec););
 
   Candidate Best;
   for (auto *SU : Available) {
 
     // SU is the next candidate to be compared against current Best.
-    Candidate c(SU, HazardRec);
+    Candidate c(SU, *HazardRec);
 
     // Remeber which SU is the best candidate.
     if (Best.SU == nullptr || c < Best) {
       Best = c;
       DEBUG(dbgs() << "+++ Best sofar: ";
-            HazardRec.dumpSU(Best.SU, dbgs());
+            HazardRec->dumpSU(Best.SU, dbgs());
             if (Best.GroupingCost != 0)
               dbgs() << "\tGrouping cost:" << Best.GroupingCost;
             if (Best.ResourcesCost != 0)
@@ -138,13 +251,13 @@
 
   // Remove SU from Available set and update HazardRec.
   Available.erase(SU);
-  HazardRec.EmitInstruction(SU);
+  HazardRec->EmitInstruction(SU);
 }
 
 void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
   // Set isScheduleHigh flag on all SUs that we want to consider first in
   // pickNode().
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
   bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
   SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
 
Index: test/CodeGen/SystemZ/int-cmp-48.ll
===================================================================
--- test/CodeGen/SystemZ/int-cmp-48.ll
+++ test/CodeGen/SystemZ/int-cmp-48.ll
@@ -29,8 +29,8 @@
 define void @f2(i8 *%src) {
 ; CHECK-LABEL: f2:
 ; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
-; CHECK: tmll [[REG]], 1
-; CHECK: mvi 0(%r2), 0
+; CHECK-DAG: mvi 0(%r2), 0
+; CHECK-DAG: tmll [[REG]], 1
 ; CHECK: ber %r14
 ; CHECK: br %r14
 entry: