Index: lib/CodeGen/MachineScheduler.cpp
===================================================================
--- lib/CodeGen/MachineScheduler.cpp
+++ lib/CodeGen/MachineScheduler.cpp
@@ -405,6 +405,7 @@
 
   // Initialize the context of the pass.
   MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
   PassConfig = &getAnalysis<TargetPassConfig>();
 
   if (VerifyScheduling)
Index: lib/Target/SystemZ/SystemZHazardRecognizer.h
===================================================================
--- lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -19,6 +19,13 @@
 // * Processor resources usage. It is beneficial to balance the use of
 // resources.
 //
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
 // ===---------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
@@ -35,11 +42,24 @@
 
 namespace llvm {
 
-/// SystemZHazardRecognizer maintains the state during scheduling.
+class SystemZHazardRecognizer;
+typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec;
+
+/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
 class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
 
-  ScheduleDAGMI *DAG;
-  const TargetSchedModel *SchedModel;
+  // A pointer to the global map of scheduler states.
+  MBB2HazRec *SchedStates;
+
+  const SystemZInstrInfo *TII;
+  TargetSchedModel &SchedModel;
+
+  // MBB and Loop that this HazardRecognizer will operate in.
+  MachineBasicBlock *MBB;
+  const MachineLoop *Loop;
+
+  // Start of region
+  MachineBasicBlock::iterator Begin;
 
   /// Keep track of the number of decoder slots used in the current
   /// decoder group.
@@ -88,18 +108,51 @@
   /// ops, return true if it seems good to schedule an FPd op next.
   bool isFPdOpPreferred_distance(const SUnit *SU);
 
+  /// Last emitted instruction or nullptr.
+  MachineInstr *LastEmittedMI;
+
+  /// Wrap a non-scheduled instruction in an SU and emit it.
+  void emitInstruction(MachineInstr *MI);
+
+  /// Update the scheduler state by emitting (non-scheduled) instructions
+  /// from I to NextBegin.
+  void advance(MachineBasicBlock::iterator I,
+               MachineBasicBlock::iterator NextBegin);
+
+  /// Emit a branch in a predecessor, and return true if it is a taken branch
+  /// to MBB.
+  bool emitIncomingBranch(MachineInstr *MI);
+
+  /// Take over state and continue scheduling from end of single predecessor.
+  void takeStateFromPred();
+
+  /// LastCall is used as a compile time optimization in leaveMBB() so that
+  /// in the case of a call in MBB, the final state is achieved by looking at
+  /// just instructions after it.
+  MachineBasicBlock::iterator LastCall;
+
 public:
-  SystemZHazardRecognizer(const MachineSchedContext *C);
+  SystemZHazardRecognizer(const TargetSubtargetInfo *ST, MBB2HazRec *SchedS_,
+                          TargetSchedModel &SM);
+
+  void enterRegion(MachineBasicBlock *MBB_,
+                   const MachineLoop *Loop_,
+                   MachineBasicBlock::iterator Begin_);
+
+  /// Called just before scheduling begins, with the DAG.
+  void initialize();
 
-  void setDAG(ScheduleDAGMI *dag) {
-    DAG = dag;
-    SchedModel = dag->getSchedModel();
-  }
-  
   HazardType getHazardType(SUnit *m, int Stalls = 0) override;    
   void Reset() override;
   void EmitInstruction(SUnit *SU) override;
 
+  /// Resolves and cache a resolved scheduling class for an SUnit.
+  const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
+    if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
+      SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
+    return SU->SchedClass;
+  }
+
   // Cost functions used by SystemZPostRASchedStrategy while
   // evaluating candidates.
 
@@ -121,6 +174,17 @@
   void dumpCurrGroup(std::string Msg = "") const;
   void dumpProcResourceCounters() const;
 #endif
+
+  /// Remeber the last (bottom-most) call in MBB. This is the Call first
+  /// passed here, since regions are bottom-up in the MBB.
+  void setLastCall(MachineBasicBlock::iterator Call) {
+    if (LastCall == nullptr)
+      LastCall = Call;
+  }
+
+  /// Leave MBB after scheduling is done. This means emiting all instructions
+  /// up to but not including the first terminator.
+  void leaveMBB();
 };
 
 } // namespace llvm
Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp
===================================================================
--- lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -19,6 +19,13 @@
 // * Processor resources usage. It is beneficial to balance the use of
 // resources.
 //
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
 // ===---------------------------------------------------------------------===//
 
 #include "SystemZHazardRecognizer.h"
@@ -37,12 +44,45 @@
                                    cl::init(8));
 
 SystemZHazardRecognizer::
-SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
-                                                        SchedModel(nullptr) {}
+SystemZHazardRecognizer(const TargetSubtargetInfo *ST, MBB2HazRec *SchedS_,
+                        TargetSchedModel &SM)
+  : SchedStates(SchedS_), TII(nullptr), SchedModel(SM),
+    MBB(nullptr), Loop(nullptr), Begin(nullptr), CurrGroupSize(0),
+    LastFPdOpCycleIdx(UINT_MAX), GrpCount(0), LastEmittedMI(nullptr),
+    LastCall(nullptr) {
+  TII = static_cast<const SystemZInstrInfo *>(ST->getInstrInfo());
+}
+
+void SystemZHazardRecognizer::enterRegion(MachineBasicBlock *MBB_,
+                                          const MachineLoop *Loop_,
+                                          MachineBasicBlock::iterator Begin_) {
+  Reset();
+  MBB = MBB_;
+  Loop = Loop_;
+  Begin = Begin_;
+}
+
+void SystemZHazardRecognizer::initialize() {
+  // There may be non-scheduled instructions before Begin. Look backwards
+  // until beginning of block or a call.
+  MachineBasicBlock::iterator PreRegBegin = Begin;
+  for (; PreRegBegin != MBB->begin(); --PreRegBegin) {
+    if (std::prev(PreRegBegin)->isCall())
+      break;
+  }
+
+  // If this is top-most in MBB, try to take over the state from a single
+  // predecessor.
+  if (PreRegBegin == MBB->begin())
+    takeStateFromPred();
+
+  // Emit any instructions before Begin.
+  advance(PreRegBegin, Begin);
+}
 
 unsigned SystemZHazardRecognizer::
 getNumDecoderSlots(SUnit *SU) const {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
 
@@ -69,16 +109,21 @@
 }
 
 void SystemZHazardRecognizer::Reset() {
+  MBB = nullptr;
+  Loop = nullptr;
+  Begin = nullptr;
   CurrGroupSize = 0;
   clearProcResCounters();
   GrpCount = 0;
   LastFPdOpCycleIdx = UINT_MAX;
+  LastEmittedMI = nullptr;
+  LastCall = nullptr;
   DEBUG(CurGroupDbg = "";);
 }
 
 bool
 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return true;
 
@@ -107,7 +152,7 @@
     CurrGroupSize = 0;
 
     // Decrease counters for execution units by one.
-    for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+    for (unsigned i = 0; i < SchedModel.getNumProcResourceKinds(); ++i)
       if (ProcResourceCounters[i] > 0)
         ProcResourceCounters[i]--;
 
@@ -125,17 +170,17 @@
 #ifndef NDEBUG // Debug output
 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
   OS << "SU(" << SU->NodeNum << "):";
-  OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
+  OS << TII->getName(SU->getInstr()->getOpcode());
 
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return;
   
   for (TargetSchedModel::ProcResIter
-         PI = SchedModel->getWriteProcResBegin(SC),
-         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+         PI = SchedModel.getWriteProcResBegin(SC),
+         PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
     const MCProcResourceDesc &PRD =
-      *SchedModel->getProcResource(PI->ProcResourceIdx);
+      *SchedModel.getProcResource(PI->ProcResourceIdx);
     std::string FU(PRD.Name);
     // trim e.g. Z13_FXaUnit -> FXa
     FU = FU.substr(FU.find("_") + 1);
@@ -175,7 +220,7 @@
 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
   bool any = false;
 
-  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+  for (unsigned i = 0; i < SchedModel.getNumProcResourceKinds(); ++i)
     if (ProcResourceCounters[i] > 0) {
       any = true;
       break;
@@ -185,10 +230,10 @@
     return;
 
   dbgs() << "+++ Resource counters:\n";
-  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+  for (unsigned i = 0; i < SchedModel.getNumProcResourceKinds(); ++i)
     if (ProcResourceCounters[i] > 0) {
       dbgs() << "+++ Extra schedule for execution unit "
-             << SchedModel->getProcResource(i)->Name
+             << SchedModel.getProcResource(i)->Name
              << ": " << ProcResourceCounters[i] << "\n";
       any = true;
     }
@@ -196,14 +241,14 @@
 #endif //NDEBUG
 
 void SystemZHazardRecognizer::clearProcResCounters() {
-  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
+  ProcResourceCounters.assign(SchedModel.getNumProcResourceKinds(), 0);
   CriticalResourceIdx = UINT_MAX;
 }
 
 // Update state with SU as the next scheduled unit.
 void SystemZHazardRecognizer::
 EmitInstruction(SUnit *SU) {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   DEBUG( dumpCurrGroup("Decode group before emission"););
 
   // If scheduling an SU that must begin a new decoder group, move on
@@ -218,8 +263,10 @@
            cgd << ", ";
          dumpSU(SU, cgd););
 
+  LastEmittedMI = SU->getInstr();
+
   // After returning from a call, we don't know much about the state.
-  if (SU->getInstr()->isCall()) {
+  if (SU->isCall) {
     DEBUG (dbgs() << "+++ Clearing state after call.\n";);
     clearProcResCounters();
     LastFPdOpCycleIdx = UINT_MAX;
@@ -231,10 +278,10 @@
 
   // Increase counter for execution unit(s).
   for (TargetSchedModel::ProcResIter
-         PI = SchedModel->getWriteProcResBegin(SC),
-         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+         PI = SchedModel.getWriteProcResBegin(SC),
+         PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
     // Don't handle FPd together with the other resources.
-    if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
+    if (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
       continue;
     int &CurrCounter =
       ProcResourceCounters[PI->ProcResourceIdx];
@@ -246,7 +293,7 @@
           CurrCounter >
           ProcResourceCounters[CriticalResourceIdx]))) {
       DEBUG( dbgs() << "+++ New critical resource: "
-             << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
+             << SchedModel.getProcResource(PI->ProcResourceIdx)->Name
              << "\n";);
       CriticalResourceIdx = PI->ProcResourceIdx;
     }
@@ -271,7 +318,7 @@
 }
 
 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return 0;
   
@@ -315,7 +362,7 @@
 resourcesCost(SUnit *SU) {
   int Cost = 0;
 
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = getSchedClass(SU);
   if (!SC->isValid())
     return 0;
 
@@ -326,8 +373,8 @@
   // For other instructions, give a cost to the use of the critical resource.
   else if (CriticalResourceIdx != UINT_MAX) {
     for (TargetSchedModel::ProcResIter
-           PI = SchedModel->getWriteProcResBegin(SC),
-           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
+           PI = SchedModel.getWriteProcResBegin(SC),
+           PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI)
       if (PI->ProcResourceIdx == CriticalResourceIdx)
         Cost = PI->Cycles;
   }
@@ -335,3 +382,139 @@
   return Cost;
 }
 
+void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI) {
+  // Make a temporary SUnit.
+  SUnit SU(MI, 0);
+
+  // Set interesting flags.
+  SU.isCall = MI->isCall();
+
+  const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+  for (const MCWriteProcResEntry &PRE :
+         make_range(SchedModel.getWriteProcResBegin(SC),
+                    SchedModel.getWriteProcResEnd(SC))) {
+    switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
+    case 0:
+      SU.hasReservedResource = true;
+      break;
+    case 1:
+      SU.isUnbuffered = true;
+      break;
+    default:
+      break;
+    }
+  }
+
+  EmitInstruction(&SU);
+}
+
+void SystemZHazardRecognizer::
+advance(MachineBasicBlock::iterator I,
+        MachineBasicBlock::iterator NextBegin) {
+  for (; I != NextBegin; ++I) {
+    if (I->isPosition() || I->isDebugValue())
+      continue;
+    emitInstruction(&*I);
+  }
+}
+
+bool SystemZHazardRecognizer::emitIncomingBranch(MachineInstr *MI) {
+  DEBUG (dbgs() << "+++ Emitting incoming branch: "; MI->dump(););
+
+  emitInstruction(MI);
+
+  if (MI->isBranch() &&
+      (TII->getBranchInfo(*MI).Target->isReg() || // Relative branch
+       TII->getBranchInfo(*MI).Target->getMBB() == MBB)) {
+    // Taken branch from predecessor
+    if (CurrGroupSize > 0)
+      nextGroup(false /*DbgOutput*/);
+    return true;
+  }
+
+  assert ((MI->isBranch() || MI->isReturn() ||
+           MI->getOpcode() == SystemZ::CondTrap) &&
+          "Scheduler: expected a branch or conditional return/trap");
+
+  // NT branches end group after first decoder slot.
+  if (CurrGroupSize == 2)
+    nextGroup(false /*DbgOutput*/);
+
+  return false;
+}
+
+// Try to find a single predecessor that would be interesting for the
+// scheduler in the top-most region of MBB.
+static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
+                                             const MachineLoop *Loop) {
+  if (MBB->pred_size() == 1)
+    return *MBB->pred_begin();
+
+  // The loop header has two predecessors, return the latch, but not for a
+  // single block loop.
+  if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
+    for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I)
+      if (Loop->contains(*I))
+        return (*I == MBB ? nullptr : *I);
+  }
+
+  return nullptr;
+}
+
+void SystemZHazardRecognizer::
+takeStateFromPred() {
+  // Take state from single predecessor MBB, if it has been scheduled.
+  MachineBasicBlock *SinglePredMBB = getSingleSchedPred(MBB, Loop);
+  assert (!SinglePredMBB || !Loop || Loop->contains(SinglePredMBB));
+  if (SinglePredMBB == nullptr ||
+      SchedStates->find(SinglePredMBB) == SchedStates->end())
+    return;
+
+  // Be optimistic and assume that branch prediction will generally do "the
+  // right thing".
+
+  // Get incoming scheduler state.
+  SystemZHazardRecognizer *incoming = (*SchedStates)[SinglePredMBB];
+  DEBUG (dbgs() << "+++ Continued scheduling from MBB#"
+         << incoming->MBB->getNumber() << "\n";);
+
+  // Current decoder group
+  CurrGroupSize = incoming->CurrGroupSize;
+  DEBUG (CurGroupDbg = incoming->CurGroupDbg;);
+
+  // Processor resources
+  ProcResourceCounters = incoming->ProcResourceCounters;
+  CriticalResourceIdx = incoming->CriticalResourceIdx;
+
+  // FPd
+  LastFPdOpCycleIdx = incoming->LastFPdOpCycleIdx;
+  GrpCount = incoming->GrpCount;
+
+  // Emit incoming terminator(s).
+  for (MachineBasicBlock::iterator I = incoming->MBB->getFirstTerminator();
+       I != incoming->MBB->end(); I++)
+    if (emitIncomingBranch(&*I))
+      break;
+}
+
+void SystemZHazardRecognizer::leaveMBB() {
+  MachineBasicBlock::iterator I;
+
+  if (LastEmittedMI != nullptr)
+    // If scheduling was done, emit everything after the region.
+    I = std::next(MachineBasicBlock::iterator(LastEmittedMI));
+  else if (LastCall != nullptr)
+    // Otherwise, emit everything after the last call in MBB, if there is
+    // one.
+    I = std::next(MachineBasicBlock::iterator(LastCall));
+  else {
+    // Get the correct final state by emitting the whole MBB.
+    takeStateFromPred();
+    I = MBB->begin();
+  }
+
+  // Advance to first terminator. The successor block will handle them in
+  // takeStateFromPred().
+  advance(I, MBB->getFirstTerminator());
+}
+
Index: lib/Target/SystemZ/SystemZMachineScheduler.h
===================================================================
--- lib/Target/SystemZ/SystemZMachineScheduler.h
+++ lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -11,8 +11,9 @@
 // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
 // the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
 // implementation that looks to optimize decoder grouping and balance the
-// usage of processor resources.
-//===----------------------------------------------------------------------===//
+// usage of processor resources. Scheduler states are saved for the end
+// region of each MBB, so that a successor block can learn from it.
+// ===----------------------------------------------------------------------===//
 
 #include "SystemZHazardRecognizer.h"
 #include "llvm/CodeGen/MachineScheduler.h"
@@ -28,7 +29,11 @@
   
 /// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
 class SystemZPostRASchedStrategy : public MachineSchedStrategy {
-  ScheduleDAGMI *DAG;
+
+  // A SchedModel is needed before any DAG is built while advancing past
+  // non-scheduled instructions, so it would not always be possible to call
+  // DAG->getSchedClass(SU).
+  TargetSchedModel SchedModel;
   
   /// A candidate during instruction evaluation.
   struct Candidate {
@@ -79,12 +84,35 @@
   /// The set of available SUs to schedule next.
   SUSet Available;
 
-  // HazardRecognizer that tracks the scheduler state for the current
-  // region.
-  SystemZHazardRecognizer HazardRec;
-  
+  /// Maintain hazard recognizers for all blocks, so that the scheduler state
+  /// can be maintained past BB boundaries when appropariate.
+  MBB2HazRec SchedStates;
+
+  /// Pointer to the HazardRecognizer that tracks the scheduler state for
+  /// the current region.
+  SystemZHazardRecognizer *HazardRec;
+
+  /// A temporary HazardRecognizer used for regions that are separated (by a
+  /// call) from the bottom-most region of the MBB.
+  SystemZHazardRecognizer TmpHazRec;
+
+  /// Since there is no virtual leaveRegion() method, use a pointer to check
+  /// when scheduler has changed MBB, so that leaveMBB() can be called for
+  /// the previous MBB.
+  MachineBasicBlock *PreviouslyVisitedMBB;
+
+  /// Loops are checked so that headers can be identified in
+  /// takeStateFromPred().
+  const MachineLoopInfo *MLI;
+
 public:
   SystemZPostRASchedStrategy(const MachineSchedContext *C);
+  virtual ~SystemZPostRASchedStrategy();
+
+  /// Called for a region before scheduling.
+  void initPolicy(MachineBasicBlock::iterator Begin,
+                  MachineBasicBlock::iterator End,
+                  unsigned NumRegionInstrs) override;
 
   /// PostRA scheduling does not track pressure.
   bool shouldTrackPressure() const override { return false; }
Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp
===================================================================
--- lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -11,8 +11,9 @@
 // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
 // the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
 // implementation that looks to optimize decoder grouping and balance the
-// usage of processor resources.
-//===----------------------------------------------------------------------===//
+// usage of processor resources. Scheduler states are saved for the end
+// region of each MBB, so that a successor block can learn from it.
+// ===----------------------------------------------------------------------===//
 
 #include "SystemZMachineScheduler.h"
 
@@ -36,12 +37,62 @@
 
 SystemZPostRASchedStrategy::
 SystemZPostRASchedStrategy(const MachineSchedContext *C)
-  : DAG(nullptr), HazardRec(C) {}
+  : HazardRec(nullptr),
+    TmpHazRec(&C->MF->getSubtarget(), &SchedStates, SchedModel),
+    PreviouslyVisitedMBB(nullptr), MLI(C->MLI) {
+  const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
+  const SystemZInstrInfo *TII =
+    static_cast<const SystemZInstrInfo *>(ST->getInstrInfo());
+  SchedModel.init(ST->getSchedModel(), ST, TII);
+}
+
+SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
+  // Delete hazard recognizers kept around for each MBB.
+  for (auto I : SchedStates) {
+    SystemZHazardRecognizer *hazrec = I.second;
+    delete hazrec;
+  }
+}
+
+void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
+                                            MachineBasicBlock::iterator End,
+                                            unsigned NumRegionInstrs) {
+  MachineBasicBlock *MBB = Begin->getParent();
+  const MachineLoop *Loop = MLI->getLoopFor(MBB);
+
+  // If a new MBB has been entered, finalize the previous MBB.
+  if (PreviouslyVisitedMBB != nullptr && PreviouslyVisitedMBB != MBB)
+    SchedStates.find(PreviouslyVisitedMBB)->second->leaveMBB();
+  PreviouslyVisitedMBB = MBB;
+
+  // We can maintain the scheder state perfectly even when scheduling regions
+  // in reverse order (bottom-up) in MBB, because the only scheduling
+  // boundaries we have are calls, which simply reset the state.
+  if (SchedStates.find(MBB) != SchedStates.end()) {
+    // Use the temporary HazardRecognizer for any regions above calls.
+    DEBUG (dbgs() << "+++ Continuing in MBB#" << MBB->getNumber());
+    HazardRec = &TmpHazRec;
+  } else {
+    // First time in MBB is the bottom-most region. Make a new
+    // HazardRecognizer and save it for use by successor block.
+    DEBUG (dbgs() << "+++ Entering MBB#" << MBB->getNumber());
+    HazardRec = new SystemZHazardRecognizer(&MBB->getParent()->getSubtarget(),
+                                            &SchedStates, SchedModel);
+    SchedStates[MBB] = HazardRec;
+  }
+
+  DEBUG (if(Loop && Loop->getHeader() == MBB)
+           dbgs() << " (Loop header)";
+         dbgs() << ":\n";);
+
+  if (End != MBB->end() && End->isCall())
+    SchedStates[MBB]->setLastCall(End);
+
+  HazardRec->enterRegion(MBB, Loop, Begin);
+}
 
 void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
-  DAG = dag;
-  HazardRec.setDAG(dag);
-  HazardRec.Reset();
+  HazardRec->initialize();
 }
 
 // Pick the next node to schedule.
@@ -55,25 +106,25 @@
   // If only one choice, return it.
   if (Available.size() == 1) {
     DEBUG (dbgs() << "+++ Only one: ";
-           HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
+           HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
     return *Available.begin();
   }
 
   // All nodes that are possible to schedule are stored by in the
   // Available set.
-  DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec););
+  DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec););
 
   Candidate Best;
   for (auto *SU : Available) {
 
     // SU is the next candidate to be compared against current Best.
-    Candidate c(SU, HazardRec);
+    Candidate c(SU, *HazardRec);
 
     // Remeber which SU is the best candidate.
     if (Best.SU == nullptr || c < Best) {
       Best = c;
       DEBUG(dbgs() << "+++ Best sofar: ";
-            HazardRec.dumpSU(Best.SU, dbgs());
+            HazardRec->dumpSU(Best.SU, dbgs());
             if (Best.GroupingCost != 0)
               dbgs() << "\tGrouping cost:" << Best.GroupingCost;
             if (Best.ResourcesCost != 0)
@@ -138,13 +189,13 @@
 
   // Remove SU from Available set and update HazardRec.
   Available.erase(SU);
-  HazardRec.EmitInstruction(SU);
+  HazardRec->EmitInstruction(SU);
 }
 
 void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
   // Set isScheduleHigh flag on all SUs that we want to consider first in
   // pickNode().
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  const MCSchedClassDesc *SC = TmpHazRec.getSchedClass(SU);
   bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
   SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
 
Index: test/CodeGen/SystemZ/int-cmp-48.ll
===================================================================
--- test/CodeGen/SystemZ/int-cmp-48.ll
+++ test/CodeGen/SystemZ/int-cmp-48.ll
@@ -29,8 +29,8 @@
 define void @f2(i8 *%src) {
 ; CHECK-LABEL: f2:
 ; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
-; CHECK: tmll [[REG]], 1
-; CHECK: mvi 0(%r2), 0
+; CHECK-DAG: mvi 0(%r2), 0
+; CHECK-DAG: tmll [[REG]], 1
 ; CHECK: ber %r14
 ; CHECK: br %r14
 entry: