Index: include/llvm/CodeGen/MachineScheduler.h
===================================================================
--- include/llvm/CodeGen/MachineScheduler.h
+++ include/llvm/CodeGen/MachineScheduler.h
@@ -163,8 +163,12 @@
   // first.
   bool DisableLatencyHeuristic;
 
+  // If true, try to use instructions which can fold a reload of a reg.
+  bool FoldableReloadHeuristic;
+
   MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false),
-    OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false) {}
+    OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false),
+    FoldableReloadHeuristic(false) {}
 };
 
 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
@@ -196,6 +200,9 @@
   /// Initialize the strategy after building the DAG for a new region.
   virtual void initialize(ScheduleDAGMI *DAG) = 0;
 
+  /// Tell strategy that a region is done, so that it can write stats.
+  virtual void leaveRegion() {};
+
   /// Notify this strategy that all roots have been released (including those
   /// that depend on EntrySU or ExitSU).
   virtual void registerRoots() {}
@@ -769,7 +776,7 @@
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
     NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
-    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+    ResourceReduce, ResourceDemand, FoldReload, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
 
 #ifndef NDEBUG
Index: include/llvm/Target/TargetInstrInfo.h
===================================================================
--- include/llvm/Target/TargetInstrInfo.h
+++ include/llvm/Target/TargetInstrInfo.h
@@ -921,6 +921,13 @@
   }
 
 public:
+  /// Return true if MI has an equivalent instruction that instead
+  /// reads one source reg from memory. If reg is 0, true is returned
+  /// if such an equivalent instruction exists, but if reg is given a
+  /// check is done that reg is used in the foldable operand.
+  virtual bool hasFoldableOperand(const MachineInstr *MI,
+                                  unsigned reg = 0) const { return false; }
+
   /// unfoldMemoryOperand - Separate a single instruction which folded a load or
   /// a store or a load and a store into two or more instruction. If this is
   /// possible, returns true as well as the new instructions by reference.
Index: lib/CodeGen/CalcSpillWeights.cpp
===================================================================
--- lib/CodeGen/CalcSpillWeights.cpp
+++ lib/CodeGen/CalcSpillWeights.cpp
@@ -131,6 +131,7 @@
 VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
   MachineRegisterInfo &mri = MF.getRegInfo();
   const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
+  const TargetInstrInfo &tii = *MF.getSubtarget().getInstrInfo();
   MachineBasicBlock *mbb = nullptr;
   MachineLoop *loop = nullptr;
   bool isExiting = false;
@@ -170,6 +171,11 @@
       // Calculate instr weight.
       bool reads, writes;
       std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+      // If mi can be transformed to fold a reload of li.reg, then
+      // weight for reading becomes 0.
+      if (reads && tii.hasFoldableOperand(mi, li.reg))
+        reads = 0;
+
       weight = LiveIntervals::getSpillWeight(
         writes, reads, &MBFI, mi);
 
Index: lib/CodeGen/MachineScheduler.cpp
===================================================================
--- lib/CodeGen/MachineScheduler.cpp
+++ lib/CodeGen/MachineScheduler.cpp
@@ -191,6 +191,7 @@
   AU.setPreservesCFG();
   AU.addRequiredID(MachineDominatorsID);
   AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -371,6 +372,8 @@
 
   // Initialize the context of the pass.
   MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   PassConfig = &getAnalysis<TargetPassConfig>();
 
   if (VerifyScheduling)
@@ -498,6 +501,7 @@
     }
     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
     Scheduler.finishBlock();
+
     // FIXME: Ideally, no further passes should rely on kill flags. However,
     // thumb2 size reduction is currently an exception, so the PostMIScheduler
     // needs to do this.
@@ -739,6 +743,8 @@
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
+  SchedImpl->leaveRegion();
+  
   placeDebugValues();
 
   DEBUG({
@@ -2376,6 +2382,7 @@
   case RegMax:         return "REG-MAX   ";
   case ResourceReduce: return "RES-REDUCE";
   case ResourceDemand: return "RES-DEMAND";
+  case FoldReload:     return "FOLDRELOAD";
   case TopDepthReduce: return "TOP-DEPTH ";
   case TopPathReduce:  return "TOP-PATH  ";
   case BotHeightReduce:return "BOT-HEIGHT";
@@ -2500,6 +2507,26 @@
   return false;
 }
 
+static bool tryFoldableReload(GenericSchedulerBase::SchedCandidate &TryCand,
+                              GenericSchedulerBase::SchedCandidate &Cand,
+                              SchedBoundary &Zone,
+                              const TargetInstrInfo *TII) {
+  bool CandReloadFoldable = TII->hasFoldableOperand(Cand.SU->getInstr());
+  bool TryCandReloadFoldable = TII->hasFoldableOperand(TryCand.SU->getInstr());
+
+  if (Zone.isTop()) {
+    if (tryLess(TryCandReloadFoldable, CandReloadFoldable,
+                TryCand, Cand, GenericSchedulerBase::FoldReload))
+      return true;
+  }
+  else {
+    if (tryGreater(TryCandReloadFoldable, CandReloadFoldable,
+                   TryCand, Cand, GenericSchedulerBase::FoldReload))
+      return true;
+  }
+  return false;
+}
+
 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
                       bool IsTop) {
   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
@@ -2779,7 +2806,6 @@
                                                TryCand, Cand, RegExcess, TRI,
                                                DAG->MF))
     return;
-
   // Avoid increasing the max critical pressure in the scheduled region.
   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
                                                Cand.RPDelta.CriticalMax,
@@ -2834,6 +2860,14 @@
                  TryCand, Cand, ResourceDemand))
     return;
 
+  // Try to put lower in final schedule an instruction that doesn't
+  // mind if a source reg gets spilled, i.e. it can fold a reload of
+  // it. This makes the source operands more likely to be spilled, as
+  // opposed to the def operand.
+  if (RegionPolicy.FoldableReloadHeuristic &&
+      tryFoldableReload(TryCand, Cand, Zone, DAG->TII))
+    return;
+
   // Avoid serializing long latency dependence chains.
   // For acyclic path limited loops, latency was already checked above.
   if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
Index: lib/Target/SystemZ/CMakeLists.txt
===================================================================
--- lib/Target/SystemZ/CMakeLists.txt
+++ lib/Target/SystemZ/CMakeLists.txt
@@ -17,12 +17,14 @@
   SystemZConstantPoolValue.cpp
   SystemZElimCompare.cpp
   SystemZFrameLowering.cpp
+  SystemZHazardRecognizer.cpp
   SystemZISelDAGToDAG.cpp
   SystemZISelLowering.cpp
   SystemZInstrInfo.cpp
   SystemZLDCleanup.cpp
   SystemZLongBranch.cpp
   SystemZMachineFunctionInfo.cpp
+  SystemZMachineScheduler.cpp
   SystemZMCInstLower.cpp
   SystemZRegisterInfo.cpp
   SystemZSelectionDAGInfo.cpp
Index: lib/Target/SystemZ/SystemZ.td
===================================================================
--- lib/Target/SystemZ/SystemZ.td
+++ lib/Target/SystemZ/SystemZ.td
@@ -14,6 +14,11 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
+// SystemZ subtargets scheduling models.
+//===----------------------------------------------------------------------===//
+include "SystemZSchedule.td"
+
+//===----------------------------------------------------------------------===//
 // SystemZ supported processors and features
 //===----------------------------------------------------------------------===//
 
Index: lib/Target/SystemZ/SystemZHazardRecognizer.h
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -0,0 +1,161 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a hazard recognizer for scheduling of SystemZ
+// functions. The main goal is to optimize decoder grouping.
+//
+// A decoder group can maximally hold 3 uops. Some instructions are
+// expanded to 2 or more uops by the decoder, which means some groups
+// will only contain 1 or 2 instructions.
+//
+// There are also instructions that have dual issue and execute on
+// more than one execution unit, although the decoder only needs one
+// slot for them. Currently, those extra execution units are however
+// not considered. The uops modelled here represent one decoder slot
+// and a usage of one processor resource.
+// ===---------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+
+/// SystemZDecodeGroupHardRecognizer reports a hazard for any
+/// instruction that does not fit into current decoder group.
+class SystemZDecodeGroupHazardRecognizer : public ScheduleHazardRecognizer {
+  const ScheduleDAG *DAG;
+  TargetSchedModel SchedModel;
+
+  /// Keep track of the number of decoder slots used in current
+  /// decoder group.
+  unsigned CurGroupSize;
+
+  /// Counters for the number of uops scheduled per processor
+  /// resource.
+  SmallVector<int, 0> ProcResourceCounters;
+
+  /// Since the ProcResources are not currently enumerated, look this
+  /// index up and store it.
+  unsigned FPD_RESOURCE_IDX;
+
+  /// Return MCSchedClassDesc for SU, or nullptr if not available.
+  inline const MCSchedClassDesc *getSchedClassDesc(const SUnit *SU) const {
+    const MCInstrDesc *MCIDesc = DAG->getInstrDesc(SU);
+    if (MCIDesc != nullptr) {
+      unsigned Idx = MCIDesc->getSchedClass();
+      if (Idx)
+        return SchedModel.getMCSchedModel()->getSchedClassDesc(Idx);
+    }
+    return nullptr;
+  }
+
+  /// Return number of uops as defined in .td file.
+  inline unsigned getNumMicroOps(const SUnit *SU) const {
+    const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+    if (SC == nullptr)
+      return 1;
+    unsigned NumUOps = SC->NumMicroOps;
+
+    // If instruction has more than three explicit source registers,
+    // it will limit the individual decode group to 2 uops.
+    if (NumUOps == 1 && hasPlus3Sources(SU))
+      return 2;
+
+    return NumUOps;
+  }
+
+  /// Return true if SU fits into current decoder group.
+  bool fitsIntoCurrentGroup(SUnit *SU) const;
+
+  /// Initialize hazard recognizer before scheduling a region.
+  void init();
+
+  unsigned numGroupsPerCycle() {
+    return SchedModel.getMCSchedModel()->IssueWidth / 3;
+  }
+
+  /// Two multicycle (div/sqrt) BFP operations should preferrably not
+  /// be issued to the same processor side, since that will incur a
+  /// stall (blocking execution unit).
+  /// Return true if SU is a BFP multicycle instruction.
+  bool isBFPMultiCycle(const SUnit *SU) const;
+
+  /// True if current group contains a multi cycle op.
+  bool currGroupHasMultiCycleOp;
+
+  /// Return true if the instruction has more than three sources,
+  /// which will limit the group to 2 uops instead of 3.
+  bool hasPlus3Sources(const SUnit *SU) const;
+
+  /// (Experimental) Statistics counters 
+  unsigned InsCount;
+  unsigned GrpCount;
+  unsigned MaxHeight;
+  unsigned SPAccesses;
+  unsigned MaxScheduledLatency;
+  unsigned QueuedUnits;
+  unsigned MaxQueued;
+  unsigned Noops;
+  unsigned Stalls;
+  unsigned Groupers;
+
+  // Loop depth is considered for loop-weighted statistics.
+  unsigned LoopDepth;
+  unsigned getLoopWeight() { return (LoopDepth ? LoopDepth * 50 : 1); }
+
+public:
+  SystemZDecodeGroupHazardRecognizer(const ScheduleDAG *DAG_);
+
+  HazardType getHazardType(SUnit *m, int Stalls = 0) override;    
+  void Reset() override;
+  void EmitInstruction(SUnit *SU) override;
+  unsigned PreEmitNoops(SUnit *) override;
+
+  /// Start next decoder group.
+  void nextGroup();
+
+  // Cost functions used by SystemZPostRASchedStrategy while
+  // evaluating candidates.
+  bool newGroupAndSUMustBegin(const SUnit *SU) const;
+  bool mustEndSUWouldCompleteGroup(const SUnit *SU) const;
+  int groupingCost(const SUnit *SU) const; 
+  bool multiCycleStallInGroup(const SUnit *SU) const;
+  unsigned resourcesCost(const SUnit *SU) const;
+
+#ifndef NDEBUG
+  // Debug dumping.
+  std::string CurGroupDbg; // current group as text
+  void dumpSU(SUnit *SU, raw_ostream &OS) const;
+  void dumpCurrGroup(std::string Msg = "") const;
+  void dumpProcResourceCounters() const;
+#endif
+
+  /// Update statistics after scheduling.
+  void doStats();
+  void resourcesQueued(const SUnit *SU, unsigned &ResQueued,
+                       unsigned &MaxQ) const;
+
+  /// Set loop depth for loop weighted statistics.
+  void setLoopDepth(unsigned d) { LoopDepth = d; }
+};
+
+} // end namespace llvm
+
+#endif
Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -0,0 +1,489 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZHazardRecognizer.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <stdio.h>
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// Plenty of (experimental) statistics.
+
+STATISTIC(NumInstrs, "SystemZ: Number of instructions scheduled (all).");
+STATISTIC(NumDecoderGroups, "SystemZ: Number of decoder groups scheduled.");
+
+// Too much parallellism pre-ra will result in spilling
+STATISTIC(NumSPAccesses, "SystemZ: Number of SP accesses");
+
+// Too little prallellism pre-ra will result in unnecessarily
+// dependent instructions, if the reg-alloc reuses registers. That
+// will make the post-ra DAGs higher.
+STATISTIC(NumDAGHeights, "SystemZ: Total height of all DAGs (post RA)");
+
+// The post-ra measure of the static latency of the scheduled regions.
+// The more priority scheduler puts on latency, the smaller this will be.
+STATISTIC(RegionLatency, "SystemZ: The scheduled latency for regions.");
+
+// How many instruction did the post-ra scheduler think it scheduled
+// when exec units were not beleived to be available?
+STATISTIC(ExecUnitsQueues,"SystemZ: FUs queued");
+
+// What did the post-ra scheduler think was the longest exec unit
+// queue for any scheduled instruction?
+STATISTIC(MaxExecUnitQueue, "SystemZ: Sum of longest FU queues in regions");
+
+STATISTIC(NumNoops, "SystemZ: Number of noops inserted");
+STATISTIC(NumStalls, "SystemZ: Number of stall cycles (FPd unit)");
+
+STATISTIC(NumGroupers, "SystemZ: Number of grouping instructions");
+
+// Loop weighted versions. A loop is weighted to (50 * loop-depth).
+STATISTIC(NumDecoderGroupsWeighted,
+          "SystemZ: Number of decoder groups scheduled, Loop Weighted.");
+STATISTIC(NumDAGHeightsWeighted,
+          "SystemZ: Total height of all DAGs (post RA), Loop Weighted.");
+STATISTIC(RegionLatencyWeighted,
+          "SystemZ: The scheduled latency for regions, Loop Weighted.");
+STATISTIC(ExecUnitsQueuesWeighted,
+          "SystemZ: FUs queued, Loop Weighted.");
+STATISTIC(MaxExecUnitQueueWeighted,
+          "SystemZ: Sum of longest FU queues in regions, Loop Weighted.");
+STATISTIC(NumStallsWeighted,
+          "SystemZ: Number of stall cycles (FPd unit), Loop Weighted.");
+STATISTIC(NumGroupersWeighted,
+          "SystemZ: Number of grouping instructions, Loop Weighted.");
+
+
+// XXJ option which turns off the hazard recognizer, to check the
+// value of it.
+static cl::opt<bool> Active("decgroups", cl::Hidden,
+                            cl::init(true));
+
+// Experimental option: Insert nops to not put two multi cycle ops in
+// the same decoder group.
+static cl::opt<bool> FPUdNops("fpudnops", cl::Hidden,
+  cl::desc("SystemZ: Insert nops to separate multi cycle ops."),
+  cl::init(true));
+
+SystemZDecodeGroupHazardRecognizer::
+SystemZDecodeGroupHazardRecognizer(const ScheduleDAG *DAG_) : DAG(DAG_) {
+  const SystemZSubtarget &ST =
+    static_cast<const SystemZSubtarget&>(DAG->MF.getSubtarget());
+  SchedModel.init(ST.getSchedModel(), &ST, ST.getInstrInfo());
+
+  // Set to 1 to indicate 'enabled'.
+  MaxLookAhead = 1;
+
+  // Find out what is the index for the FPd unit if it is part of the
+  // sched model (enum values are currently not available). FIXME:
+  // This would probably be better handled with a general modelling of
+  // stalling executional units.
+  FPD_RESOURCE_IDX = 0; // InvalidUnit
+  for (unsigned PIdx = 1, PEnd = SchedModel.getNumProcResourceKinds();
+       PIdx != PEnd; ++PIdx)
+    if (SchedModel.getProcResource(PIdx)->BufferSize == 0) {
+      assert (FPD_RESOURCE_IDX == 0 && "Cannot assume this FPd unit?");
+      FPD_RESOURCE_IDX = PIdx;
+    }
+
+  init();
+}
+
+bool SystemZDecodeGroupHazardRecognizer::
+isBFPMultiCycle(const SUnit *SU) const {
+  // If FPd unit is not present, return false.
+  if (FPD_RESOURCE_IDX == 0)
+    return false;
+    
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  if (SC != nullptr) {
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel.getWriteProcResBegin(SC),
+           PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI)
+      if (PI->ProcResourceIdx == FPD_RESOURCE_IDX)
+        return true;
+  }
+  return false;
+}
+
+
+// Return true if the instruction has more than three sources, which
+// will limit the group to 2 uops instead of 3.
+bool SystemZDecodeGroupHazardRecognizer::
+hasPlus3Sources(const SUnit *SU) const {
+  unsigned NumSources = 0;
+  for (auto &MO : SU->getInstr()->operands())
+    if (MO.isReg() && MO.isUse() && !MO.isImplicit())
+      ++NumSources;
+  return (NumSources > 3);
+}
+
+ScheduleHazardRecognizer::HazardType SystemZDecodeGroupHazardRecognizer::
+getHazardType(SUnit *m, int Stalls) {
+  if (!Active) // XXJ
+    return NoHazard;
+
+  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
+}
+
+void SystemZDecodeGroupHazardRecognizer::Reset() {
+  init();
+}
+
+void SystemZDecodeGroupHazardRecognizer::init() {
+  CurGroupSize = 0;
+
+  currGroupHasMultiCycleOp = false;
+
+  InsCount = 0;
+  GrpCount = 0;
+  MaxHeight = 0;
+  SPAccesses = 0;
+  MaxScheduledLatency = 0;
+  QueuedUnits = 0;
+  MaxQueued = 0;
+  Noops = 0;
+  Stalls = 0;
+  Groupers = 0;
+  
+  LoopDepth = 0;
+
+  ProcResourceCounters.resize(SchedModel.getNumProcResourceKinds());
+  for (unsigned i = 0; i < SchedModel.getNumProcResourceKinds(); ++i)
+    ProcResourceCounters[i] = 0;
+
+  DEBUG(CurGroupDbg = "";);
+}
+
+bool
+SystemZDecodeGroupHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
+  if (CurGroupSize == 0)
+    return true;
+
+  // If instruction must begin group, it cannot be added to current
+  // group (empty group checked for above).
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  if (SC != nullptr && SC->BeginGroup)
+    return false;
+
+  unsigned NumUOps = getNumMicroOps(SU);
+
+  // If SU has a scheduling class, subtarget must fill in the proper
+  // values. At least one micro op is expected.
+  assert ( NumUOps > 0 &&
+           "Missing subtarget scheduler input for SU?");
+
+  // Any instruction using 2 or more uops also begins a new group,
+  // which was handled above.
+  assert (NumUOps == 1 &&
+          "Instruction with multiple uops does not begin group?");
+
+  // Since a full group is handled immediately in EmitInstruction(),
+  // SU should fit into current group.
+  assert ((CurGroupSize + NumUOps <= 3) &&
+          "Expected non-full group!");
+
+  return true;
+}
+
+// Start next decoder group.
+void SystemZDecodeGroupHazardRecognizer::nextGroup() {
+  if (CurGroupSize > 0) {
+    DEBUG(dumpCurrGroup("Completed decode group"));
+    DEBUG(CurGroupDbg = "";);
+
+    // Reset current group
+    CurGroupSize = 0;
+    currGroupHasMultiCycleOp = false;
+
+    GrpCount++;
+
+    // Decrease counters for execution units by one.
+    for (unsigned i = 0; i < SchedModel.getNumProcResourceKinds(); ++i)
+      if (ProcResourceCounters[i] > 0)
+        ProcResourceCounters[i]--;
+  }
+
+  DEBUG(dumpProcResourceCounters(););
+}
+
+#ifndef NDEBUG
+// Debug output
+void SystemZDecodeGroupHazardRecognizer::
+dumpSU(SUnit *SU, raw_ostream &OS) const{
+  OS << "SU(" << SU->NodeNum << ")";
+  OS << ":" << DAG->TII->getName(SU->getInstr()->getOpcode());
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  if (SC != nullptr) {
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel.getWriteProcResBegin(SC),
+           PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+      const MCProcResourceDesc &PRD =
+        *SchedModel.getProcResource(PI->ProcResourceIdx);
+      std::string U(PRD.Name);
+
+      // trim e.g. Z13_FXUnit -> FXU
+      if (U.find("FXU") != std::string::npos)
+        OS << "/FXU";
+      else if (U.find("VFU") != std::string::npos)
+        OS << "/VFU";
+      else if (U.find("FPU") != std::string::npos)
+        OS << "/FPU";
+      else if (U.find("FPd") != std::string::npos)
+        OS << "/FPd";
+      else if (U.find("LSU") != std::string::npos)
+        OS << "/LSU";
+      else if (U.find("VBU") != std::string::npos)
+        OS << "/VBU";
+
+      if (PI->Cycles > 1)
+        OS << "(" << PI->Cycles << "cyc)";
+    }
+
+    if (SC->NumMicroOps > 1)
+      OS << "/" << SC->NumMicroOps << "uops";
+    if (SC->BeginGroup && SC->EndGroup)
+      OS << "/GroupsAlone";
+    else if (SC->BeginGroup)
+      OS << "/BeginsGroup";
+    else if (SC->EndGroup)
+      OS << "/EndsGroup";
+  }
+}
+
+void SystemZDecodeGroupHazardRecognizer::
+dumpCurrGroup(std::string Msg) const {
+  dbgs() << "+++ " << Msg << ": ";
+  if (CurGroupDbg.empty())
+    dbgs() << " <empty>\n";
+  else {
+    dbgs() << "{ " << CurGroupDbg << " }";
+    dbgs() << " (" << CurGroupSize << (CurGroupSize > 1 ? "uops":"uop")
+           << ")\n";
+  }
+}
+
+void SystemZDecodeGroupHazardRecognizer::dumpProcResourceCounters() const {
+  for (unsigned i = 0; i < SchedModel.getNumProcResourceKinds(); ++i)
+    if (ProcResourceCounters[i] > 0) {
+      dbgs() << "+++ Extra schedule for execution unit "
+             << SchedModel.getProcResource(i)->Name
+             << ": " << ProcResourceCounters[i] << "\n";
+    }
+}
+#endif
+
+// Update statistics after scheduling.
+void SystemZDecodeGroupHazardRecognizer::doStats() {
+  if (InsCount <= 1)
+    return;
+
+  if (CurGroupSize)
+    nextGroup();
+
+  // Print average number of instructions per decoder group for region.
+  DEBUG(char Tmp[16];
+        float Ratio = ((float) InsCount / GrpCount);
+        sprintf(Tmp, "%.3f", Ratio);
+        dbgs() << "+++ stats: " << InsCount
+        << " instructions, " << GrpCount << " decoder group(s), Ratio:  "
+        << Tmp << ", DAG height: " << MaxHeight << ", SP: "
+        << SPAccesses << "\n";);
+  
+  NumInstrs += InsCount;
+
+  NumDecoderGroups += GrpCount;
+  NumDecoderGroupsWeighted += GrpCount * getLoopWeight();
+
+  NumSPAccesses += SPAccesses;
+
+  NumDAGHeights += MaxHeight;
+  NumDAGHeightsWeighted += MaxHeight * getLoopWeight();
+
+  RegionLatency += MaxScheduledLatency;
+  RegionLatencyWeighted += MaxScheduledLatency * getLoopWeight();
+
+  ExecUnitsQueues += QueuedUnits;
+  ExecUnitsQueuesWeighted += QueuedUnits * getLoopWeight();
+
+  MaxExecUnitQueue += MaxQueued;
+  MaxExecUnitQueueWeighted += MaxQueued * getLoopWeight();
+
+  NumNoops += Noops;
+
+  NumStalls += Stalls;
+  NumStallsWeighted += Stalls * getLoopWeight();
+
+  NumGroupers += Groupers;
+  NumGroupersWeighted += Groupers * getLoopWeight();
+}
+
+// Update state by taking SU as next instruction.
+void SystemZDecodeGroupHazardRecognizer::EmitInstruction(SUnit *SU) {
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  assert (fitsIntoCurrentGroup(SU) && "Emitted SU does not fit in group?");
+
+  DEBUG(dumpCurrGroup("Decode group before emission"););
+  DEBUG(dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
+        dbgs() << "\n";
+        raw_string_ostream cgd(CurGroupDbg);
+        if (CurGroupDbg.length())
+          cgd << ", ";
+        dumpSU(SU, cgd));
+
+  // Do statistics first
+  InsCount++;
+  if (SU->getHeight() > MaxHeight)
+    MaxHeight = SU->getHeight();
+  const MachineInstr *MI = SU->getInstr();
+  if ((MI->mayLoad() || MI->mayStore()) &&
+      MI->readsRegister(SystemZ::R15D))
+    SPAccesses++;
+  
+  unsigned CurrDecoderCycle = GrpCount / numGroupsPerCycle();
+  unsigned LatencyToEnd = CurrDecoderCycle + SU->getHeight();
+  if (LatencyToEnd > MaxScheduledLatency)
+    MaxScheduledLatency = LatencyToEnd;
+
+  unsigned ResQueued, MaxQ;
+  resourcesQueued(SU, ResQueued, MaxQ);
+  QueuedUnits += ResQueued;
+  if (MaxQ > MaxQueued)
+    MaxQueued = MaxQ;
+
+  if (SC != nullptr && (SC->BeginGroup || (SC->EndGroup)))
+    Groupers++;
+
+  // Keep track of number of uops in current group.
+  CurGroupSize += getNumMicroOps(SU);
+
+  // Make note of a scheduled blocking multi cycle op.
+  if (isBFPMultiCycle(SU)) {
+    if (currGroupHasMultiCycleOp)
+      Stalls += 8;
+    currGroupHasMultiCycleOp = true;
+  }
+
+  // Increase counter for execution unit(s).
+  if (SC != nullptr) {
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel.getWriteProcResBegin(SC),
+           PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI)
+        ProcResourceCounters[PI->ProcResourceIdx] += PI->Cycles;
+  }
+  
+  // Check if current group is now full. Ops with more than 3 uops or
+  // ops that have a dynamic number of uops (such as Load Multiple),
+  // are not handled any further than ending the current group.
+  if (CurGroupSize >= 3 || (SC != nullptr && SC->EndGroup))
+    nextGroup();
+}
+
+// Emit nop(s) to put FPd (blocking multicycle) SU into the next
+// decoder group, if it is the second one into the group. The nop
+// insertion is done by the sched strategy immediately upon returning
+// from here, then EmitInstruction() is called.
+unsigned SystemZDecodeGroupHazardRecognizer::PreEmitNoops(SUnit *SU) {
+  if (FPUdNops &&
+      currGroupHasMultiCycleOp &&
+      isBFPMultiCycle(SU)) {
+    unsigned Num = (3 - CurGroupSize);
+    nextGroup();
+    Noops += Num;
+    return Num;
+  }
+  return 0;
+}
+
+bool SystemZDecodeGroupHazardRecognizer::
+newGroupAndSUMustBegin(const SUnit *SU) const {
+  if (CurGroupSize)
+    return false;
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  return (SC != nullptr && SC->BeginGroup);
+}
+
+bool SystemZDecodeGroupHazardRecognizer::
+mustEndSUWouldCompleteGroup(const SUnit *SU) const {
+  if (!CurGroupSize)
+    return false;
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  return (SC != nullptr && !SC->BeginGroup && SC->EndGroup &&
+          (getNumMicroOps(SU) == 3 - CurGroupSize));
+}
+
+int SystemZDecodeGroupHazardRecognizer::
+groupingCost(const SUnit *SU) const {
+  if (newGroupAndSUMustBegin(SU) || mustEndSUWouldCompleteGroup(SU))
+    return -1;
+
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  if (SC == nullptr)
+    return 0;
+
+  int cost = 0;
+  if (SC->BeginGroup && CurGroupSize)
+    cost = (3 - CurGroupSize);
+
+  if (SC->EndGroup) {
+    unsigned resultingGroupSize = (CurGroupSize + getNumMicroOps(SU));
+    if (resultingGroupSize < 3)
+      cost = 3 - resultingGroupSize;
+  }
+
+  return cost;
+}
+
+// Return true if scheduling this SU would mean putting a second multi
+// cycle stalling instruction into current group.
+bool SystemZDecodeGroupHazardRecognizer::
+multiCycleStallInGroup(const SUnit *SU) const {
+  return (currGroupHasMultiCycleOp && isBFPMultiCycle(SU));
+}
+
+// Return the number of queued cycles per processor resource before
+// scheduling SU.
+unsigned SystemZDecodeGroupHazardRecognizer::
+resourcesCost(const SUnit *SU) const {
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  unsigned cost = 0;
+  if (SC != nullptr) {
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel.getWriteProcResBegin(SC),
+           PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI)
+      cost += ProcResourceCounters[PI->ProcResourceIdx];
+  }
+  return cost;
+}
+
+// At the point of scheduling SU, check how many resource cycles of SU
+// will be queued
+void SystemZDecodeGroupHazardRecognizer::
+resourcesQueued(const SUnit *SU, unsigned &ResQueued, unsigned &MaxQ) const {
+  const MCSchedClassDesc *SC = getSchedClassDesc(SU);
+  ResQueued = 0;
+  MaxQ = 0;
+  if (SC != nullptr) {
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel.getWriteProcResBegin(SC),
+           PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+      unsigned PIdx = PI->ProcResourceIdx;
+      if (ProcResourceCounters[PIdx] + PI->Cycles > 1) {
+        unsigned q = ProcResourceCounters[PIdx] + PI->Cycles - 1;
+        ResQueued += q;
+        if (ResQueued > MaxQ)
+          MaxQ = ResQueued;
+      }
+    }
+  }
+}
Index: lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.h
+++ lib/Target/SystemZ/SystemZISelLowering.h
@@ -426,6 +426,9 @@
   MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
                                                  MachineBasicBlock *BB) const
     override;
+
+  const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
+
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
   bool allowTruncateForTailCall(Type *, Type *) const override;
   bool mayBeEmittedAsTailCall(CallInst *CI) const override;
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -27,6 +27,10 @@
 
 #define DEBUG_TYPE "systemz-lower"
 
+cl::opt<std::string> SchedPref("schedpref",
+        cl::desc("Experimental: SystemZ SchedPref"),
+        cl::init("source"), cl::Hidden);
+
 namespace {
 // Represents a sequence for extracting a 0/1 value from an IPM result:
 // (((X ^ XORValue) + AddValue) >> Bit)
@@ -116,11 +120,21 @@
   // Set up special registers.
   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
 
-  // TODO: It may be better to default to latency-oriented scheduling, however
-  // LLVM's current latency-oriented scheduler can't handle physreg definitions
-  // such as SystemZ has with CC, so set this to the register-pressure
-  // scheduler, because it can.
-  setSchedulingPreference(Sched::RegPressure);
+  // XXJ Experimental.
+  if (Subtarget.isZ10())
+    setSchedulingPreference(Sched::RegPressure);
+  else {
+    if (SchedPref=="source")
+      setSchedulingPreference(Sched::Source);
+    else if (SchedPref=="hybrid")
+      setSchedulingPreference(Sched::Hybrid);
+    else if (SchedPref=="ilp")
+      setSchedulingPreference(Sched::ILP);
+    else if (SchedPref=="regpress")
+      setSchedulingPreference(Sched::RegPressure);
+    else
+      llvm_unreachable("bad schedpref string");
+  }
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -5927,3 +5941,14 @@
     llvm_unreachable("Unexpected instr type to insert");
   }
 }
+
+const TargetRegisterClass *SystemZTargetLowering::
+getRepRegClassFor(MVT VT) const {
+  // This can unfortunately not distinguish between integer /
+  // vector registers. Do they both need to be 'untyped'?
+  // (This is called if ilp-list scheduler is used.)
+  if (VT == MVT::Untyped)
+    return &SystemZ::GR128BitRegClass;
+
+  return TargetLowering::getRepRegClassFor(VT);
+}
Index: lib/Target/SystemZ/SystemZInstrInfo.h
===================================================================
--- lib/Target/SystemZ/SystemZInstrInfo.h
+++ lib/Target/SystemZ/SystemZInstrInfo.h
@@ -17,6 +17,7 @@
 #include "SystemZ.h"
 #include "SystemZRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 
 #define GET_INSTRINFO_HEADER
 #include "SystemZGenInstrInfo.inc"
@@ -117,6 +118,7 @@
 class SystemZInstrInfo : public SystemZGenInstrInfo {
   const SystemZRegisterInfo RI;
   SystemZSubtarget &STI;
+  TargetSchedModel SchedModel;
 
   void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
   void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
@@ -151,6 +153,13 @@
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
+  ScheduleHazardRecognizer*
+  CreateTargetMIHazardRecognizer(const InstrItineraryData*,
+                                 const ScheduleDAG *DAG) const override;
+
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const override;
+
   bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
   bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
@@ -192,6 +201,8 @@
                                       MachineBasicBlock::iterator InsertPt,
                                       MachineInstr *LoadMI) const override;
   bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
+  bool hasFoldableOperand(const MachineInstr *MI, unsigned reg = 0) const
+    override;
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
     override;
 
@@ -240,6 +251,16 @@
   void loadImmediate(MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator MBBI,
                      unsigned Reg, uint64_t Value) const;
+
+  // Sometimes, it is possible for the target to tell, even without
+  // aliasing information, that two MIs access different memory
+  // addresses. This function returns true if two MIs access different
+  // memory addresses and false otherwise.
+  bool
+  areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+                                  AliasAnalysis *AA = nullptr) const override;
+
+  int getMemOpcode(unsigned opc) const;
 };
 } // end namespace llvm
 
Index: lib/Target/SystemZ/SystemZInstrInfo.cpp
===================================================================
--- lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -14,8 +14,11 @@
 #include "SystemZInstrInfo.h"
 #include "SystemZInstrBuilder.h"
 #include "SystemZTargetMachine.h"
+#include "SystemZHazardRecognizer.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
 
 using namespace llvm;
 
@@ -23,6 +26,8 @@
 #define GET_INSTRMAP_INFO
 #include "SystemZGenInstrInfo.inc"
 
+#define DEBUG_TYPE "systemz-instr-info"
+
 // Return a mask with Count low bits set.
 static uint64_t allOnes(unsigned int Count) {
   return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
@@ -43,6 +48,8 @@
 SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
   : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
     RI(), STI(sti) {
+
+  SchedModel.init(STI.getSchedModel(), &STI, STI.getInstrInfo());
 }
 
 // MI is a 128-bit load or store.  Split it into two 64-bit loads or stores,
@@ -359,6 +366,27 @@
 }
 
 bool SystemZInstrInfo::
+hasFoldableOperand(const MachineInstr *MI, unsigned reg) const {
+  if (SystemZ::getMemOpcode(MI->getOpcode()) == -1)
+    return false;
+
+  // If MI is mapped to a memory-opcode, it can fold one of its
+  // operands in case that operand register gets spilled. If reg is 0,
+  // we don't know which operand might be spilled, but the mischeduler
+  // TryCandidate() can help things a bit generally by putting MI a
+  // bit lower in final schedule.
+  if (!reg)
+    return true;
+
+  // If reg is given, check if that operand could be folded if placed
+  // on stack. CalcSpillWeights will in this case decrease the cost
+  // estimate for spilling the register.
+  unsigned NumOps = MI->getNumExplicitOperands();
+  const MachineOperand &MO = MI->getOperand(NumOps - 1);
+  return (MO.isReg() && MO.getReg() == reg);
+}
+
+bool SystemZInstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Invalid condition");
   Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm());
@@ -402,6 +430,24 @@
   return Count;
 }
 
+ScheduleHazardRecognizer* SystemZInstrInfo::
+CreateTargetMIHazardRecognizer(const InstrItineraryData*,
+                               const ScheduleDAG *DAG) const {
+  bool isPreRA = DAG->MRI.getNumVirtRegs();
+
+  if (!isPreRA && SchedModel.hasInstrSchedModel())
+    return new SystemZDecodeGroupHazardRecognizer(DAG);
+
+  // Dummy hazard recognizer allows all instructions to issue.
+  return new ScheduleHazardRecognizer();
+}
+
+void SystemZInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MI) const {
+  BuildMI(MBB, MI, DebugLoc(), get(SystemZ::LR), SystemZ::R0L)
+    .addReg(SystemZ::R0L);
+}
+
 bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
                                       unsigned &SrcReg, unsigned &SrcReg2,
                                       int &Mask, int &Value) const {
@@ -1292,3 +1338,46 @@
   }
   BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
 }
+
+bool SystemZInstrInfo::
+areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+                                AliasAnalysis *AA) const {
+
+  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+    return false;
+
+  // If mem-operands show that the same address Value is used by both
+  // instructions, check for non-overlapping offsets and widths. Not
+  // sure if a register based analysis would be an improvement...
+
+  MachineMemOperand *MMOa = *MIa->memoperands_begin();
+  MachineMemOperand *MMOb = *MIb->memoperands_begin();
+  const Value *VALa = MMOa->getValue();
+  const Value *VALb = MMOb->getValue();
+  bool SameVal = (VALa && VALb && (VALa == VALb));
+  if (!SameVal) {
+    const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+    const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+    if (PSVa && PSVb && (PSVa == PSVb))
+      SameVal = true;
+  }
+  if (SameVal) {
+    int OffsetA = MMOa->getOffset(), OffsetB = MMOb->getOffset();
+    int WidthA = MMOa->getSize(), WidthB = MMOb->getSize();
+    int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+    int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+    int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+    if (LowOffset + LowWidth <= HighOffset)
+      return true;
+  }
+
+  return false;
+}
+
+// A wrapper around the generated function, since it can't be included
+// and built twice. XXJ Remove?
+int SystemZInstrInfo::
+getMemOpcode(unsigned opc) const {
+  return SystemZ::getMemOpcode(opc);
+}
+
Index: lib/Target/SystemZ/SystemZInstrInfo.td
===================================================================
--- lib/Target/SystemZ/SystemZInstrInfo.td
+++ lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1222,7 +1222,7 @@
 // A serialization instruction that acts as a barrier for all memory
 // accesses, which expands to "bcr 14, 0".
 let hasSideEffects = 1 in
-def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
+  def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
 
 let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
   def LAA   : LoadAndOpRSY<"laa",   0xEBF8, atomic_load_add_32, GR32>;
Index: lib/Target/SystemZ/SystemZMachineScheduler.h
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -0,0 +1,116 @@
+//==-- SystemZMachineScheduler.h - SystemZ Scheduler Interface -*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SystemZ Machine Scheduler interface
+// This scheduler is run just before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstrInfo.h"
+#include "SystemZHazardRecognizer.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Support/Debug.h"
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+
+using namespace llvm;
+
+namespace llvm {
+  
+class SystemZPostRASchedStrategy : public MachineSchedStrategy {
+
+  struct Candidate {
+    SUnit *SU;
+
+    // GroupingCost is negative if it would be a win to schedule this
+    // SU next, or positive if it would break the group early.
+    int GroupingCost;
+
+    bool MultiCycleStall;
+    unsigned ResourcesCost;
+    Candidate() : SU(nullptr), GroupingCost(0), MultiCycleStall(false),
+                  ResourcesCost(0) {}
+    Candidate(SUnit *SU_, SystemZDecodeGroupHazardRecognizer *HazRec,
+              bool NegGroupingCost);
+
+    // Compare two candidates.
+    bool operator<(const Candidate &other);
+
+    // Check if this node is as good as it could be.
+    bool noCost() {
+      return (GroupingCost <= 0 && !MultiCycleStall && !ResourcesCost);
+    }
+  };
+
+  // Keep all available SUs in a set sorted by their heights.
+  struct SUSorter {
+    bool operator() (const SUnit *lhs, const SUnit *rhs) const {
+      if (lhs->getHeight() > rhs->getHeight())
+        return true;
+      else if (lhs->getHeight() < rhs->getHeight())
+        return false;
+      return (lhs->NodeNum < rhs->NodeNum);
+    }
+  };
+  struct SUSet : std::set<SUnit*, SUSorter> {
+    #ifndef NDEBUG
+    void dump(SystemZDecodeGroupHazardRecognizer *HazRec);
+    #endif
+  };
+
+  ScheduleDAGMI *DAG;
+
+  const MachineLoopInfo *MLI;
+  // Loop of current region, or nullptr
+  const MachineLoop *Loop;
+
+  // All available nodes, sorted by height.
+  SUSet Available;
+
+  // HazardRecognizer that tracks decoder groups.
+  SystemZDecodeGroupHazardRecognizer *HazRec;
+ public:
+  SystemZPostRASchedStrategy(const MachineSchedContext *C) :
+    DAG(nullptr), MLI(C->MLI), Loop(nullptr), HazRec(nullptr) {}
+  virtual ~SystemZPostRASchedStrategy() { delete HazRec; }
+
+
+  /// Called before each region
+  void initPolicy(MachineBasicBlock::iterator Begin,
+                  MachineBasicBlock::iterator End,
+                  unsigned NumRegionInstrs) override;
+
+  void leaveRegion() override { HazRec->doStats(); }
+
+  /// PostRA scheduling does not track pressure.
+  bool shouldTrackPressure() const override { return false; }
+
+  /// Initialize the strategy after building the DAG for a new region.
+  void initialize(ScheduleDAGMI *dag) override;
+
+  /// Pick the next node to schedule, or return NULL.
+  SUnit *pickNode(bool &IsTopNode) override;
+
+  /// ScheduleDAGMI has scheduled an instruction - tell HazardRec
+  /// about it.
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+
+  /// SU has had all predecessor dependencies resolved. Put it into
+  /// Available.
+  void releaseTopNode(SUnit *SU) override;
+
+  /// Currently only scheduling top-down, so this method is empty.
+  void releaseBottomNode(SUnit *SU) override {};
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H */
Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -0,0 +1,288 @@
+//==-- Systemzmachinescheduler.h - SystemZ Scheduler Interface -*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SystemZ Machine Scheduler interface
+// This scheduler is run just before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineScheduler.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// XXJ option which reverses heuristics by reversing the candidate
+// evaluation score.
+static cl::opt<bool> DoMyWorst("domyworst", cl::Hidden,
+                               cl::init(false));
+
+// XXJ option to make scheduler output same order, to get hazard
+// recognizer statistics without scheduling.
+static cl::opt<bool> SchedNoChange("schednochange", cl::Hidden,
+                                   cl::init(false));
+
+// XXJ option to make scheduler output a random ("unscheduled") order.
+#include <stdlib.h>     /* srand, rand */
+#include <time.h>       /* time */
+static cl::opt<bool> SchedRandom("schedrandom", cl::Hidden,
+                                 cl::init(false));
+
+// XXJ option to make scheduler just schedule per height
+static cl::opt<bool> SchedByHeight("schedheight", cl::Hidden,
+                                 cl::init(false));
+
+static cl::opt<bool> DisableResources("sched-noresources", cl::Hidden,
+                                 cl::init(false));
+
+
+// ------------------------ Post RA scheduling ---------------------------- //
+// SystemZPostRASchedStrategy is a scheduling strategy which is
+// plugged into the MachineScheduler. It has an Available set of SUs
+// sorted by height, and a pickNode() implementation that schedules by
+// height while also filling decoder groups and balancing the use
+// resources.
+
+#ifndef NDEBUG
+// Print the set of SUs
+void SystemZPostRASchedStrategy::SUSet::
+dump(SystemZDecodeGroupHazardRecognizer *HazRec) {
+  dbgs() << "{";
+  for (auto &SU : *this) {
+    HazRec->dumpSU(SU, dbgs());
+    if (SU != *rbegin())
+      dbgs() << ",  ";
+  }
+  dbgs() << "}\n";
+}
+#endif
+
+void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
+                                            MachineBasicBlock::iterator End,
+                                            unsigned NumRegionInstrs) {
+  const MachineBasicBlock *MBB = Begin->getParent();
+  Loop = MLI->getLoopFor(MBB);
+}
+
+void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
+  DAG = dag;
+  if (HazRec == nullptr)
+    HazRec = new SystemZDecodeGroupHazardRecognizer(DAG);
+  else
+    HazRec->Reset();
+
+  HazRec->setLoopDepth(Loop ? Loop->getLoopDepth() : 0);
+}
+
+// Pick the next node to schedule.
+SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
+  // Only scheduling top-down.
+  IsTopNode = true;
+
+  // All nodes that are possible to schedule are stored by heigth in
+  // the Available set. This includes any node with all predecessors
+  // scheduled.
+  DEBUG(dbgs() << "Available: "; Available.dump(HazRec););
+  if (Available.empty())
+    return nullptr;
+
+  SUnit *Next = nullptr;
+
+  // If only one choice, return it.
+  if (Available.size() == 1) {
+    Next = *Available.begin();
+    Available.erase(Next);
+    if (HazRec->getHazardType(Next) != ScheduleHazardRecognizer::NoHazard)
+      HazRec->nextGroup();
+    return Next;
+  }
+
+  // Experimental: Output a random order schedule, meaning "no
+  // scheduling"
+  if (SchedRandom) {
+    srand (time(NULL));
+    int num = rand() % Available.size();
+    SUSet::const_iterator it(Available.begin());
+    advance(it, num);   
+    Next = *it;
+    Available.erase(Next);
+    if (HazRec->getHazardType(Next) != ScheduleHazardRecognizer::NoHazard)
+      HazRec->nextGroup();
+    return Next;
+  }
+
+  // Experimental: Output an unchanged order of instructions, in order
+  // to get statistics for it.
+  if (SchedNoChange) {
+    SUnit *NextNodeNum = nullptr;
+    for (auto *SU : Available)
+      if (NextNodeNum == nullptr ||
+          SU->NodeNum < NextNodeNum->NodeNum)
+        NextNodeNum = SU;
+    Available.erase(NextNodeNum);
+    if (HazRec->getHazardType(NextNodeNum) != ScheduleHazardRecognizer::NoHazard)
+      HazRec->nextGroup();
+    return NextNodeNum;
+  }
+
+  Candidate Best;
+  unsigned NumChecked = 0;
+  SUnit *HighestSU = *Available.begin();
+  for (auto *SU : Available) {
+    // XXX Schedule high stalling SUs : SU->hasReservedResource
+
+    // Check with HazRec if this SU fits into current decoder group.
+    if (HazRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard)
+      continue;
+
+    // Experimental: If scheduling by height only, return first SU
+    // that fits into current decoder group.
+    if (SchedByHeight) {
+      Next = SU;
+      break;
+    }
+
+    // SU is the next candidate, with cost values that are compared to
+    // other SUs. Always check five highest SUs in case there is an SU
+    // that must begin / end a group that would be preferrable to
+    // schedule now.
+    Candidate c(SU, HazRec, (NumChecked++ < 5) /* check begin/end group */);
+    if (c.GroupingCost < 0) {
+      Next = SU;
+      break;
+    }
+
+    // Remeber which SU is the best candidate.
+    if (Best.SU == nullptr || c < Best) {
+      Best = c;
+      DEBUG(dbgs() << "Best sofar: ";
+            HazRec->dumpSU(Best.SU, dbgs());
+            dbgs() << "\tGrouping cost:" << c.GroupingCost;
+            if (Best.MultiCycleStall)
+              dbgs() << " <FPd> ";
+            else
+              dbgs() << "       ";
+            dbgs() << " Resource cost:" << Best.ResourcesCost
+            << " Height:" << Best.SU->getHeight() << "\n";);
+    }
+
+    // If more than five SUs have been checked, there was no SU that
+    // must begin or end current decoder group.
+    if (NumChecked > 5) {
+      // If there is an SU which has no cost, return it.
+      if (Best.noCost())
+        break;
+      // If all SUs that are of about the same height have been
+      // checked, return the best one.
+      if ((HighestSU->getHeight() - SU->getHeight()) > 1)
+        break;
+    }
+  }
+  if (Next == nullptr && Best.SU != nullptr)
+    Next = Best.SU;
+
+  // HazRec has rejected them all. Start a new decoder group and try
+  // again.
+  if (Next == nullptr) {
+    HazRec->nextGroup();
+    return pickNode(IsTopNode);
+  }
+
+  assert (Next != nullptr && "SU lost?");
+  Available.erase(Next);
+  return Next;
+}
+
+SystemZPostRASchedStrategy::Candidate::
+Candidate(SUnit *SU_, SystemZDecodeGroupHazardRecognizer *HazRec,
+          bool NegGroupingCost) {
+  SU = SU_;
+
+  // Check the grouping cost. For a node that must begin / end a
+  // group, it is positive if it would do so prematurely, or negative
+  // if it would fit naturally into the schedule.
+  GroupingCost = HazRec->groupingCost(SU);
+  // Only look for naturally fitting SUs within a certain
+  // "look-ahead". After that, it is known that they get a new chance
+  // since the current decoder group will be completed.
+  if (GroupingCost < 0 && !NegGroupingCost)
+    GroupingCost = 0;
+
+  // Check if this would be a second multi cycle into current group.
+  MultiCycleStall = HazRec->multiCycleStallInGroup(SU);
+
+  // Check the resources cost for this SU
+  if (!DisableResources)
+    ResourcesCost = HazRec->resourcesCost(SU);
+}
+
+bool SystemZPostRASchedStrategy::Candidate::
+operator<(const Candidate &other) {
+  bool IsBetter = true;
+
+  // Check first for decoder grouping
+  if (GroupingCost < other.GroupingCost)
+    IsBetter = true;
+  else if (GroupingCost > other.GroupingCost)
+    IsBetter = false;
+
+  // Avoid two multicycle ops in same group
+  else if (!MultiCycleStall && other.MultiCycleStall)
+    IsBetter = true;
+  else if (MultiCycleStall && !other.MultiCycleStall)
+    IsBetter = false;
+
+  // Compare the use of resources
+  else if (ResourcesCost < other.ResourcesCost)
+    IsBetter = true;
+  else if (ResourcesCost > other.ResourcesCost)
+    IsBetter = false;
+
+  // Higher SU is generally better
+  else if (SU->getHeight() > other.SU->getHeight())
+    IsBetter = true;
+  else if (SU->getHeight() < other.SU->getHeight())
+    IsBetter = false;
+
+  // If all same, keep original order.
+  else if (SU->NodeNum < other.SU->NodeNum)
+    IsBetter = true;
+  else
+    IsBetter = false;
+
+  // Experimental: DoMyWorst reverses the cost function.
+  return (!DoMyWorst ? IsBetter : !IsBetter);
+}
+
+void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  DEBUG(dbgs() << "** Scheduling ";
+        SU->dump(DAG));
+
+  // Emit nop(s) to put FPd (blocking multicycle) SU into the next
+  // decoder group. Scheduler has already moved SU.
+  if (unsigned NumNops = HazRec->PreEmitNoops(SU)) {
+    while (NumNops--) {
+      DAG->TII->insertNoop(*SU->getInstr()->getParent(), SU->getInstr());
+      DEBUG(dbgs() << "** Inserting NOOP\n");
+    }
+  }
+
+  HazRec->EmitInstruction(SU);
+}
+
+// Put all released SUs in the Available set. There is no Pending set
+// (for nodes which are not ready on the current cycle), since we are
+// primarily filling decoder groups, and will put an instruction into
+// an available decoder slot, even if it was not ready on that cycle.
+void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
+  Available.insert(SU);
+}
+
Index: lib/Target/SystemZ/SystemZProcessors.td
===================================================================
--- lib/Target/SystemZ/SystemZProcessors.td
+++ lib/Target/SystemZ/SystemZProcessors.td
@@ -78,17 +78,18 @@
 
 def : Processor<"generic", NoItineraries, []>;
 def : Processor<"z10", NoItineraries, []>;
-def : Processor<"z196", NoItineraries,
+def : ProcessorModel<"z196", Z196Model,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
                  FeatureFPExtension, FeaturePopulationCount,
                  FeatureFastSerialization, FeatureInterlockedAccess1]>;
-def : Processor<"zEC12", NoItineraries,
+def : ProcessorModel<"zEC12", ZEC12Model,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
                  FeatureFPExtension, FeaturePopulationCount,
                  FeatureFastSerialization, FeatureInterlockedAccess1,
                  FeatureMiscellaneousExtensions,
                  FeatureTransactionalExecution, FeatureProcessorAssist]>;
-def : Processor<"z13", NoItineraries,
+
+def : ProcessorModel<"z13", Z13Model,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
                  FeatureFPExtension, FeaturePopulationCount,
                  FeatureFastSerialization, FeatureInterlockedAccess1,
Index: lib/Target/SystemZ/SystemZSchedule.td
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZSchedule.td
@@ -0,0 +1,70 @@
+//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Scheduler resources
+
+// These three resources are used to express decoder grouping rules.
+def GroupAlone : SchedWrite;
+def BeginGroup : SchedWrite;
+def EndGroup   : SchedWrite;
+
+// These resources model a decoder group slot and execution unit with
+// latency. If more than one of these are used for an instruction, the
+// greatest latency will result, and the number of uops will be added,
+// although a resource may have 0 NumMicroOps.
+
+// Fixed-point
+def FXU         : SchedWrite;
+def FXU_2cyc    : SchedWrite;
+def FXU_3cyc    : SchedWrite;
+def FXU_4cyc    : SchedWrite;
+def FXU_5cyc    : SchedWrite;
+def FXU_6cyc    : SchedWrite;
+def FXU_7cyc    : SchedWrite;
+def FXU_8cyc    : SchedWrite;
+def FXU_9cyc    : SchedWrite;
+def FXU_15cyc   : SchedWrite;
+def FXU_20cyc   : SchedWrite;
+def FXU_30cyc   : SchedWrite;
+
+// Load-store
+def LSU         : SchedWrite;
+def LSU_2cyc    : SchedWrite;
+def LSU_5cyc    : SchedWrite;
+def LSU_6cyc    : SchedWrite;
+def LSU_20cyc   : SchedWrite;
+def LSU_30cyc   : SchedWrite;
+
+// Vector
+// B is defined as a "single pass through pipeline".
+def VFU_Bcyc       : SchedWrite;
+def VFU_Bplus1cyc  : SchedWrite;
+def VFU_Bplus2cyc  : SchedWrite;
+def VFU_15cyc   : SchedWrite;
+def VFU_20cyc   : SchedWrite;
+def VFU_30cyc   : SchedWrite;
+
+// Blocking BFP div/sqrt unit.
+def FPd_30cyc   : SchedWrite;
+
+// Virtual branching unit
+def VBU         : SchedWrite;
+
+// Floating point unit (zEC12 and earlier)
+def FPU_Bcyc    : SchedWrite;
+def FPU_Bplus1cyc    : SchedWrite;
+def FPU_Bplus2cyc    : SchedWrite;
+def FPU_15cyc   : SchedWrite;
+def FPU_20cyc   : SchedWrite;
+def FPU_30cyc   : SchedWrite;
+
+include "SystemZScheduleZ13.td"
+include "SystemZScheduleZEC12.td"
+include "SystemZScheduleZ196.td"
+
Index: lib/Target/SystemZ/SystemZScheduleZ13.td
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -0,0 +1,724 @@
+//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z13 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z13Model : SchedMachineModel {
+    
+    let IssueWidth = 6;             // 2 * 3 instructions decoded per cycle.
+    let MicroOpBufferSize = 60;     // Issue queues
+    let MinLatency = 0;             // Out-of-order
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty = 8;
+
+    // Max micro-ops that can be buffered for
+    // optimized loop dispatch/execution.
+    let LoopMicroOpBufferSize = 12;
+
+    // This model does not include operand specific information.
+    let CompleteModel = 0;
+}
+
+let SchedModel = Z13Model in  {
+
+// Execution units. BufferSize controls when scheduler will start to
+// postpone scheduling of instructions using that particular unit.
+def Z13_VBUnit  : ProcResource<1>;
+def Z13_FXUnit  : ProcResource<2> { let BufferSize = 2; /* ooo */ }
+def Z13_LSUnit  : ProcResource<2> { let BufferSize = 2; /* ooo */ }
+def Z13_VFUnit  : ProcResource<2> { let BufferSize = 2; /* ooo */ }
+def Z13_FPdUnit : ProcResource<2> { let BufferSize = 0; /* blocking */ }
+
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+
+def : WriteRes<BeginGroup, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+}
+
+def : WriteRes<EndGroup, []> {
+  let NumMicroOps = 0;
+  let EndGroup    = 1;
+}
+
+// Subtarget specific definitions of scheduling resources.
+
+def : WriteRes<FXU,       [Z13_FXUnit]> { let Latency = 1; }
+def : WriteRes<FXU_2cyc,  [Z13_FXUnit]> { let Latency = 2; }
+def : WriteRes<FXU_3cyc,  [Z13_FXUnit]> { let Latency = 3; }
+def : WriteRes<FXU_4cyc,  [Z13_FXUnit]> { let Latency = 4; }
+def : WriteRes<FXU_5cyc,  [Z13_FXUnit]> { let Latency = 5; }
+def : WriteRes<FXU_6cyc,  [Z13_FXUnit]> { let Latency = 6; }
+def : WriteRes<FXU_7cyc,  [Z13_FXUnit]> { let Latency = 7; }
+def : WriteRes<FXU_8cyc,  [Z13_FXUnit]> { let Latency = 8; }
+def : WriteRes<FXU_9cyc,  [Z13_FXUnit]> { let Latency = 9; }
+def : WriteRes<FXU_15cyc, [Z13_FXUnit]> { let Latency = 15; }
+def : WriteRes<FXU_20cyc, [Z13_FXUnit]> { let Latency = 20; }
+def : WriteRes<FXU_30cyc, [Z13_FXUnit]> { let Latency = 30; }
+
+def : WriteRes<LSU,      [Z13_LSUnit]> { let Latency = 1; }
+def : WriteRes<LSU_2cyc, [Z13_LSUnit]> { let Latency = 2; }
+def : WriteRes<LSU_5cyc, [Z13_LSUnit]> { let Latency = 5; }
+def : WriteRes<LSU_6cyc, [Z13_LSUnit]> { let Latency = 6; }
+def : WriteRes<LSU_20cyc,[Z13_LSUnit]> { let Latency = 20; }
+def : WriteRes<LSU_30cyc,[Z13_LSUnit]> { let Latency = 30; }
+
+def : WriteRes<VFU_Bcyc,      [Z13_VFUnit]> { let Latency = 9; }
+def : WriteRes<VFU_Bplus1cyc, [Z13_VFUnit]> { let Latency = 10; }
+def : WriteRes<VFU_Bplus2cyc, [Z13_VFUnit]> { let Latency = 11; }
+def : WriteRes<VFU_15cyc,     [Z13_VFUnit]> { let Latency = 15; }
+def : WriteRes<VFU_20cyc,     [Z13_VFUnit]> { let Latency = 20; }
+def : WriteRes<VFU_30cyc,     [Z13_VFUnit]> { let Latency = 30; }
+
+// This should be modelled as using FPd for ~30 cycles, but that seems
+// bad since SchedBoundary would consider the FPd stall a global stall
+// and increase CurrCycle by 30.
+def : WriteRes<FPd_30cyc, [Z13_FPdUnit]> { let Latency = 30; }
+
+def : WriteRes<VBU, [Z13_VBUnit]>;
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//  Call
+def : InstRW<[VBU, FXU_2cyc, FXU, GroupAlone], (instregex "BRAS$")>;
+def : InstRW<[FXU_2cyc, FXU, FXU, GroupAlone], (instregex "(Call)?BASR$")>;
+def : InstRW<[FXU, EndGroup], (instregex "CallBR$")>;
+def : InstRW<[FXU_2cyc, FXU, FXU, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXU_2cyc, FXU, FXU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[FXU, EndGroup], (instregex "Return$")>;
+
+// Serialize
+def : InstRW<[FXU, EndGroup], (instregex "Serialize$")>;
+
+///// FIXED POINT
+
+// Addition
+def : InstRW<[FXU], (instregex "A(Y|IH|SI)?$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AG(SI)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU], (instregex "AL(Y|FI|HSIK)?$")>;
+def : InstRW<[FXU], (instregex "ALG(HSIK)?$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)?$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU_2cyc, GroupAlone], (instregex "ALC(R)?$")>;
+def : InstRW<[FXU_2cyc, GroupAlone], (instregex "ALCG(R)?$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc], (instregex "AGF(R)?$")>;
+
+// Add halfword
+def : InstRW<[FXU_2cyc], (instregex "AH(Y)?$")>;
+
+// Subtraction
+def : InstRW<[FXU], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SL(G|Y|FI)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)?$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU_2cyc, GroupAlone], (instregex "SLB(G|R)?$")>;
+def : InstRW<[FXU_2cyc, GroupAlone], (instregex "SLBGR$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc], (instregex "SGF(R)?$")>;
+
+// Subtract halfword
+def : InstRW<[FXU_2cyc], (instregex "SH(Y)?$")>;
+
+// Multiply
+def : InstRW<[FXU_6cyc], (instregex "MS(R|Y|FI)?$")>;
+def : InstRW<[FXU_8cyc], (instregex "MSG(R)?$")>;
+def : InstRW<[FXU_6cyc], (instregex "MSGF(I|R)?$")>;
+def : InstRW<[FXU_9cyc, GroupAlone], (instregex "MLG(R)?$")>;
+def : InstRW<[FXU_5cyc], (instregex "MGHI$")>;
+def : InstRW<[FXU_5cyc], (instregex "MH(I|Y)?$")>;
+
+// Divide
+def : InstRW<[FXU_30cyc, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[LSU, FXU_30cyc, GroupAlone], (instregex "DSG(F)?$")>;
+def : InstRW<[FXU_20cyc, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[FXU_30cyc, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[LSU, FXU_20cyc, GroupAlone], (instregex "DL$")>;
+def : InstRW<[LSU, FXU_30cyc, GroupAlone], (instregex "DLG$")>;
+
+// And
+def : InstRW<[FXU], (instregex "N(G|Y|TSTG)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(Y|FMux|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+
+// Or
+def : InstRW<[FXU], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU], (instregex "OI(Y|FMux|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+
+// Xor
+def : InstRW<[FXU], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "X(G|Y|IFMux)?$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+
+// Insert
+def : InstRW<[FXU], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+// And / Or / Xor character
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "NC$")>;
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "OC$")>;
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "XC$")>;
+
+// Rotate
+def : InstRW<[FXU], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU_2cyc, BeginGroup], (instregex "R(N|O|X)SBG$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// Find leftmost one
+def : InstRW<[FXU_6cyc, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU_3cyc], (instregex "POPCNT$")>;
+
+// Compare
+def : InstRW<[FXU], (instregex "CG$")>;
+def : InstRW<[FXU], (instregex "C(G|Y|IH|Mux)?$")>;
+def : InstRW<[FXU], (instregex "CFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CGFI$")>;
+def : InstRW<[FXU], (instregex "CGH(I|SI)$")>;
+def : InstRW<[FXU], (instregex "CGR(L)?$")>;
+def : InstRW<[FXU], (instregex "CH(I|F|SI)$")>;
+def : InstRW<[FXU], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I)?$")>;
+def : InstRW<[FXU], (instregex "CLGFR(L)?$")>;
+def : InstRW<[FXU], (instregex "CLGR(L)?$")>;
+def : InstRW<[FXU], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLI(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR(L)?$")>;
+def : InstRW<[FXU], (instregex "CR(L)?$")>;
+
+// Compare halfword
+def : InstRW<[FXU_2cyc], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU_2cyc], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU, FXU, BeginGroup], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc], (instregex "CGF(R|RL)?$")>;
+
+// Compare and swap
+def : InstRW<[FXU, FXU, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare logical character
+def : InstRW<[FXU, LSU, BeginGroup], (instregex "CLC$")>;
+
+// Test under mask
+def : InstRW<[FXU], (instregex "TM(Y|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+// Load and test
+def : InstRW<[FXU], (instregex "LT(R)?$")>;
+def : InstRW<[FXU], (instregex "LTG(R)?$")>;
+def : InstRW<[FXU], (instregex "LTGF(R)?$")>;
+
+// Moves
+def : InstRW<[FXU], (instregex "MVGHI$")>;
+def : InstRW<[FXU], (instregex "MVH(I|HI)$")>;
+def : InstRW<[FXU], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU_2cyc, LSU, FXU, BeginGroup], (instregex "MVC$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads (LSU)
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux|CBB)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLG(C|F|H|FRL|HRL)$")>;
+def : InstRW<[LSU], (instregex "LLH(RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "L(X|128)$")>;
+
+// Loads (FXU)
+def : InstRW<[FXU], (instregex "LLCH$")>;
+def : InstRW<[FXU], (instregex "LLHH$")>;
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|F|H)R$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+def : InstRW<[FXU], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU], (instregex "LAX(G)?$")>;
+def : InstRW<[FXU], (instregex "LB(H|R|Mux)?$")>;
+def : InstRW<[FXU], (instregex "LGR$")>;
+def : InstRW<[FXU], (instregex "LGB(R)?$")>;
+def : InstRW<[FXU], (instregex "LGF(I)?$")>;
+def : InstRW<[FXU], (instregex "LGFR(L)?$")>;
+def : InstRW<[FXU], (instregex "LGH(I)?$")>;
+def : InstRW<[FXU], (instregex "LGHR(L)?$")>;
+def : InstRW<[FXU], (instregex "LH(H|I|Y|Mux|IMux)?$")>;
+def : InstRW<[FXU], (instregex "LHR(L)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LRV(R)?$")>;
+def : InstRW<[FXU], (instregex "LRVG(R)?$")>;
+
+// Load GR from FPR
+def : InstRW<[FXU_3cyc], (instregex "LGDR$")>;
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU_5cyc, LSU, LSU, LSU, LSU, GroupAlone], (instregex "LMG$")>;
+
+// Load Complement / Negative / Positive
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU_2cyc], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "LCGFR$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU_2cyc], (instregex "LP(G)?R$")>;
+
+// Load on condition
+def : InstRW<[FXU_2cyc], (instregex "LOC(R)?$")>;
+def : InstRW<[FXU_2cyc], (instregex "LOCG(R)?$")>;
+
+// Stores
+def : InstRW<[FXU], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU], (instregex "ST(X|128)$")>;
+def : InstRW<[FXU], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXU], (instregex "ST(Y|FH|RL|Mux)?$")>;
+def : InstRW<[FXU], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU], (instregex "STRV(G)?$")>;
+
+// Store on condition / CondStore pseudos
+def : InstRW<[FXU], (instregex "STOC(G)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+// Store multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, FXU_5cyc, FXU, FXU, FXU, FXU,
+              GroupAlone], (instregex "STMG$")>;
+
+// Select pseudo 
+def : InstRW<[FXU], (instregex "Select(32|64|F32|F64|F128|32Mux)$")>;
+
+// String instructions
+def : InstRW<[FXU_30cyc], (instregex "SRST$")>;
+def : InstRW<[LSU_30cyc, GroupAlone], (instregex "MVST$")>;
+def : InstRW<[LSU_30cyc, GroupAlone], (instregex "CLST$")>;
+
+///// FLOATING POINT
+
+// Addition
+def : InstRW<[VFU_Bcyc], (instregex "AEB(R)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "ADB(R)?$")>;
+def : InstRW<[VFU_Bplus2cyc, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[VFU_Bcyc], (instregex "SEB(R)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "SDB(R)?$")>;
+def : InstRW<[VFU_Bplus2cyc, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[VFU_Bcyc], (instregex "MEEB(R)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "MDB(R)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "MDEB(R)?$")>;
+def : InstRW<[VFU_Bcyc, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[VFU_Bplus1cyc, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[VFU_20cyc, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VFU_Bcyc, GroupAlone], (instregex "MAEB(R)?$")>;
+def : InstRW<[VFU_Bcyc, GroupAlone], (instregex "MSEB(R)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "M(A|S)DBR$")>;
+def : InstRW<[VFU_Bcyc, GroupAlone], (instregex "M(A|S)DB$")>;
+
+// Division
+def : InstRW<[FPd_30cyc], (instregex "DEB(R)?$")>;
+def : InstRW<[FPd_30cyc], (instregex "DDB(R)?$")>;
+def : InstRW<[FPd_30cyc, GroupAlone], (instregex "DXBR$")>;
+
+// Square root
+def : InstRW<[FPd_30cyc], (instregex "SQEB(R)?$")>;
+def : InstRW<[FPd_30cyc], (instregex "SQDB(R)?$")>;
+def : InstRW<[FPd_30cyc, GroupAlone], (instregex "SQXBR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CE(F|G)BR$")>;
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CD(F|G)BR$")>;
+def : InstRW<[FXU, VFU_Bplus2cyc, GroupAlone], (instregex "CX(F|G)BR$")>;
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, VFU_Bplus2cyc, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CF(E|D)BR$")>;
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CG(E|D)BR$")>;
+def : InstRW<[FXU, VFU_Bplus1cyc, BeginGroup], (instregex "C(F|G)XBR$")>;
+def : InstRW<[FXU, VFU_Bcyc, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CLFDBR$")>;
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, VFU_Bplus1cyc, BeginGroup], (instregex "CL(F|G)XBR$")>;
+
+// Copy sign
+def : InstRW<[VFU_Bcyc], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "CPSDRs(d|s)$")>;
+
+// Compare
+def : InstRW<[VFU_Bcyc], (instregex "CEB(R)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "CDB(R)?$")>;
+def : InstRW<[VFU_Bplus1cyc, GroupAlone], (instregex "CXBR$")>;
+
+// Load and Test
+def : InstRW<[VFU_Bcyc], (instregex "LT(D|E)BR$")>;
+def : InstRW<[VFU_Bcyc], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VFU_Bplus2cyc, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[VFU_Bplus2cyc, GroupAlone],
+             (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Load
+def : InstRW<[VFU_Bcyc], (instregex "LE(R|Y)?$")>;
+def : InstRW<[FXU], (instregex "LD(R|GR)$")>;
+def : InstRW<[FXU_2cyc, FXU, GroupAlone], (instregex "LXR$")>;
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "LZXR$")>;
+
+// Load Complement / Negative / Positive
+def : InstRW<[VFU_Bcyc], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[VFU_Bcyc], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[VFU_Bplus2cyc, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Load lengthened
+def : InstRW<[VFU_Bcyc], (instregex "LDEB(R)?$")>;
+def : InstRW<[VFU_Bcyc, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[VFU_Bplus1cyc, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Load rounded
+def : InstRW<[VFU_Bcyc], (instregex "LEDBR(A)?$")>;
+def : InstRW<[VFU_Bplus1cyc], (instregex "LEXBR(A)?$")>;
+def : InstRW<[VFU_Bplus1cyc], (instregex "LDXBR(A)?$")>;
+
+// Load FP integer
+def : InstRW<[VFU_Bcyc], (instregex "FIEBR(A)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "FIDBR(A)?$")>;
+def : InstRW<[VFU_Bplus2cyc, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+// Store
+def : InstRW<[FXU_3cyc], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU_3cyc], (instregex "STE(Y)?$")>;
+
+///// VECTOR 
+
+// Various
+def : InstRW<[VFU_Bcyc], (instregex "VA(B|F|G|H|Q|CQ)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VACC(B|F|G|H|Q|CQ)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VAVG(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VAVGL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCD(GB|LGB)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WCD(GB|LGB)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCEQB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCEQF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCEQG(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCEQH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCGDB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WCGDB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHG(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHLB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHLF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHLG(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCHLH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCKSM$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCLGDB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WCLGDB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCLZ(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VCTZ(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VEC(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VECL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VERIM(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VERLL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VERLLV(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VESL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VESLV(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VESRA(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VESRAV(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VESRL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VESRLV(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFA(D|E)B$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFAEBS$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFAEF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFAEH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFAEZB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFAEZF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFAEZH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFCEDB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WFCEDB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFCHDB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WFCHDB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFCHEDB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WFCHEDB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFEEB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFEEF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFEEH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFEEZB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFEEZF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFEEZH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFENEB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFENEF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFENEH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFENEZB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFENEZF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFENEZH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VF(I|M|S)DB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFL(C|N|P)DB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WFL(C|N|P)DB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFM(A|S)DB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WFM(A|S)DB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VFTCIDB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WFTCIDB$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VGBM$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VGFMA(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VGM(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VISTRB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VISTRF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VISTRH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VLC(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VLEI(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VLP(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMAE(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMAH(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMAL(B|F)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMALE(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMALH(B|F|H|W)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMALO(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMAO(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VME(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMH(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VML(B|F)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMLE(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMLH(B|F|H|W)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMLO(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMN(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMNL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMO(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMRH(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMRL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMX(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VMXL(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VN(C|O)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VO(NE)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPDI$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPERM$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPK(F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPKLSF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPKLSG(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPKLSH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPKSF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPKSG(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPKSH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VPOPCT$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VREP(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VREPI(B|F|G|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSB(IQ|CBIQ)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSCBI(B|F|G|H|Q)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSEG(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VS(F|G|H|Q|EL)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSL(DB)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSRA$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSRL$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSTRCB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSTRCF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSTRCH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSTRCZB(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSTRCZF(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSTRCZH(S)?$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSUM(B|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSUMG(F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VSUMQ(F|G)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VTM$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VUPH(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VUPL(B|F)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VUPLH(B|F|H|W)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VUPLL(B|F|H)$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VX$")>;
+def : InstRW<[VFU_Bcyc], (instregex "VZERO$")>;
+def : InstRW<[VFU_Bcyc], (instregex "WF(A|C|I|K|M|S)DB$")>;
+
+// Vector divide / square root
+def : InstRW<[FPd_30cyc], (instregex "(V|W)FDDB$")>;
+def : InstRW<[FPd_30cyc], (instregex "(V|W)FSQDB$")>;
+
+// Moving between GPR and FPR
+def : InstRW<[FXU], (instregex "VLVG(B|F|G|H)$")>;
+def : InstRW<[FXU], (instregex "LEFR$")>;      // Printed as VLVGF
+def : InstRW<[FXU_4cyc], (instregex "VLGV(B|F|G|H)$")>;
+def : InstRW<[FXU_4cyc], (instregex "LFER$")>; // Printed as VLGVF
+def : InstRW<[FXU_2cyc], (instregex "VLVGP(32)?$")>;
+
+// Load
+def : InstRW<[LSU_2cyc], (instregex "VL(L|BB)?$")>;
+def : InstRW<[LSU], (instregex "VL(32|64)$")>;
+def : InstRW<[LSU], (instregex "VLLEZ(B|F|G|H)$")>;
+def : InstRW<[LSU], (instregex "VLREP(B|F|G|H)$")>;
+def : InstRW<[FXU], (instregex "VLR(32|64)?$")>;
+
+// Store
+def : InstRW<[FXU_4cyc], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[FXU_4cyc], (instregex "VSTE(F|G)$")>;
+def : InstRW<[VFU_Bcyc, FXU, BeginGroup], (instregex "VSTE(B|H)$")>;
+
+// Load / store multiple
+def : InstRW<[LSU_6cyc, LSU, LSU, LSU, LSU, GroupAlone],
+              (instregex "VLM$")>;
+def : InstRW<[LSU, LSU, FXU_8cyc, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "VSTM$")>;
+
+// Byte instructions
+def : InstRW<[VFU_Bplus1cyc], (instregex "VSLB$")>;
+def : InstRW<[VFU_Bplus1cyc], (instregex "VSRAB$")>;
+def : InstRW<[VFU_Bplus1cyc], (instregex "VSRLB$")>;
+
+// Gather / scatter
+def : InstRW<[FXU, VFU_Bcyc, BeginGroup], (instregex "VGE(F|G)$")>;
+def : InstRW<[FXU_5cyc, FXU, BeginGroup], (instregex "VSCE(F|G)$")>;
+
+///// INLINE ASSEMBLY
+
+def : InstRW<[LSU, LSU, FXU_2cyc, FXU, FXU, BeginGroup], (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, FXU_3cyc, FXU, FXU, FXU, BeginGroup],
+             (instregex "STCKE$")>;
+def : InstRW<[FXU], (instregex "STFLE$")>;
+
+///// OTHER
+
+// Extract Transaction Nesting Depth
+def : InstRW<[FXU], (instregex "ETND$")>;
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXU_5cyc, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[FXU, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+// Prefetch data
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+
+// Extract access register
+def : InstRW<[LSU], (instregex "EAR$")>;
+
+// Insert Program Mask
+def : InstRW<[FXU_3cyc, EndGroup], (instregex "IPM$")>;
+
+// Processor assist
+def : InstRW<[FXU], (instregex "PPA$")>;
+
+}
+
Index: lib/Target/SystemZ/SystemZScheduleZ196.td
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -0,0 +1,520 @@
+//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z196 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z196Model : SchedMachineModel {
+    
+    let IssueWidth = 3;             // 3 instructions decoded per cycle.
+    let MicroOpBufferSize = 40;     // Issue queues
+    let MinLatency = 0;             // Out-of-order
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty  = 8;
+
+    // Max micro-ops that can be buffered for
+    // optimized loop dispatch/execution.
+    let LoopMicroOpBufferSize = 12;
+
+    // This model does not include operand specific information.
+    let CompleteModel = 0;
+}
+
+let SchedModel = Z196Model in  {
+
+// Execution units. BufferSize controls when scheduler will start to
+// postpone scheduling of instructions using that particular unit.
+def Z196_FXUnit : ProcResource<1> { let BufferSize = 2; /* ooo */ }
+def Z196_LSUnit : ProcResource<1> { let BufferSize = 2; /* ooo */ }
+def Z196_FPUnit : ProcResource<1> { let BufferSize = 2; /* ooo */ }
+
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+
+def : WriteRes<BeginGroup, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+}
+
+def : WriteRes<EndGroup, []> {
+  let NumMicroOps = 0;
+  let EndGroup    = 1;
+}
+
+// Subtarget specific definitions of scheduling resources.
+
+def : WriteRes<FXU,       [Z196_FXUnit]> { let Latency = 1; }
+def : WriteRes<FXU_2cyc,  [Z196_FXUnit]> { let Latency = 2; }
+def : WriteRes<FXU_3cyc,  [Z196_FXUnit]> { let Latency = 3; }
+def : WriteRes<FXU_4cyc,  [Z196_FXUnit]> { let Latency = 4; }
+def : WriteRes<FXU_5cyc,  [Z196_FXUnit]> { let Latency = 5; }
+def : WriteRes<FXU_6cyc,  [Z196_FXUnit]> { let Latency = 6; }
+def : WriteRes<FXU_7cyc,  [Z196_FXUnit]> { let Latency = 7; }
+def : WriteRes<FXU_8cyc,  [Z196_FXUnit]> { let Latency = 8; }
+def : WriteRes<FXU_9cyc,  [Z196_FXUnit]> { let Latency = 9; }
+def : WriteRes<FXU_15cyc, [Z196_FXUnit]> { let Latency = 15; }
+def : WriteRes<FXU_20cyc, [Z196_FXUnit]> { let Latency = 20; }
+def : WriteRes<FXU_30cyc, [Z196_FXUnit]> { let Latency = 30; }
+
+def : WriteRes<LSU,       [Z196_LSUnit]> { let Latency = 1; }
+def : WriteRes<LSU_2cyc,  [Z196_LSUnit]> { let Latency = 2; }
+def : WriteRes<LSU_5cyc,  [Z196_LSUnit]> { let Latency = 5; }
+def : WriteRes<LSU_6cyc,  [Z196_LSUnit]> { let Latency = 6; }
+def : WriteRes<LSU_20cyc, [Z196_LSUnit]> { let Latency = 20; }
+def : WriteRes<LSU_30cyc, [Z196_LSUnit]> { let Latency = 30; }
+
+def : WriteRes<FPU_Bcyc,      [ZEC12_FPUnit]> { let Latency = 8; }
+def : WriteRes<FPU_Bplus1cyc, [ZEC12_FPUnit]> { let Latency = 9; }
+def : WriteRes<FPU_Bplus2cyc, [ZEC12_FPUnit]> { let Latency = 10; }
+def : WriteRes<FPU_15cyc,  [Z196_FPUnit]> { let Latency = 15; }
+def : WriteRes<FPU_20cyc,  [Z196_FPUnit]> { let Latency = 20; }
+def : WriteRes<FPU_30cyc,  [Z196_FPUnit]> { let Latency = 30; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//  Call
+def : InstRW<[LSU, FXU_2cyc, FXU, GroupAlone], (instregex "BRAS$")>;
+def : InstRW<[FXU_2cyc, FXU, LSU, GroupAlone], (instregex "(Call)?BASR$")>;
+def : InstRW<[LSU, EndGroup], (instregex "CallBR$")>;
+def : InstRW<[LSU, FXU_2cyc, FXU, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[LSU, FXU_2cyc, FXU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[LSU, EndGroup], (instregex "Return$")>;
+
+// Serialize
+def : InstRW<[LSU, EndGroup], (instregex "Serialize$")>;
+
+///// FIXED POINT
+
+// Addition
+def : InstRW<[FXU], (instregex "A(Y|IH|SI)?$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AG(SI)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU], (instregex "AL(Y|FI|HSIK)?$")>;
+def : InstRW<[FXU], (instregex "ALG(HSIK)?$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)?$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "ALC(R)?$")>;
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "ALCG(R)?$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "AGF(R)?$")>;
+
+// Add halfword
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "AH(Y)?$")>;
+
+// Subtraction
+def : InstRW<[FXU], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SL(G|Y|FI)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)?$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "SLB(G|R)?$")>;
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "SLBGR$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "SGF(R)?$")>;
+
+// Subtract halfword
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "SH(Y)?$")>;
+
+// Multiply
+def : InstRW<[FXU_6cyc], (instregex "MS(R|Y|FI)?$")>;
+def : InstRW<[FXU_8cyc], (instregex "MSG(R)?$")>;
+def : InstRW<[FXU_6cyc], (instregex "MSGF(I|R)?$")>;
+def : InstRW<[FXU_9cyc, GroupAlone], (instregex "MLG(R)?$")>;
+def : InstRW<[FXU_5cyc], (instregex "MGHI$")>;
+def : InstRW<[FXU_5cyc], (instregex "MH(I|Y)?$")>;
+
+// Divide
+def : InstRW<[FPU_30cyc, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DSG(F)?R$")>;
+def : InstRW<[FPU_30cyc, LSU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DSG(F)?$")>;
+def : InstRW<[FPU_30cyc, FXU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DL(G)?R$")>;
+def : InstRW<[FPU_30cyc, LSU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DL$")>;
+def : InstRW<[FPU_30cyc, LSU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DLG$")>;
+
+// And
+def : InstRW<[FXU], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(Y|FMux|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+
+// Or
+def : InstRW<[FXU], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU], (instregex "OI(Y|FMux|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+
+// Xor
+def : InstRW<[FXU], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "X(G|Y|IFMux)?$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+
+// Insert
+def : InstRW<[FXU], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+// And / Or / Xor character
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "NC$")>;
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "OC$")>;
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "XC$")>;
+
+// Rotate
+def : InstRW<[FXU], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU_2cyc, GroupAlone], (instregex "R(N|O|X)SBG$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// Find leftmost one
+def : InstRW<[FXU_7cyc, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU_3cyc], (instregex "POPCNT$")>;
+
+// Compare
+def : InstRW<[FXU], (instregex "CG$")>;
+def : InstRW<[FXU], (instregex "C(G|Y|IH|Mux)?$")>;
+def : InstRW<[FXU], (instregex "CFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CGFI$")>;
+def : InstRW<[FXU], (instregex "CGH(I|SI)$")>;
+def : InstRW<[FXU], (instregex "CGR(L)?$")>;
+def : InstRW<[FXU], (instregex "CH(I|F|SI)$")>;
+def : InstRW<[FXU], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I)?$")>;
+def : InstRW<[FXU], (instregex "CLGFR(L)?$")>;
+def : InstRW<[FXU], (instregex "CLGR(L)?$")>;
+def : InstRW<[FXU], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLI(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR(L)?$")>;
+def : InstRW<[FXU], (instregex "CR(L)?$")>;
+
+// Compare halfword
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "CGF(R|RL)?$")>;
+
+// Compare and swap
+def : InstRW<[FXU, FXU, BeginGroup], (instregex "CS(G|Y)?$")>;
+
+// Compare logical character
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "CLC$")>;
+
+// Test under mask
+def : InstRW<[FXU], (instregex "TM(Y|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+// Load and test
+def : InstRW<[FXU], (instregex "LT(R)?$")>;
+def : InstRW<[FXU], (instregex "LTG(R)?$")>;
+def : InstRW<[FXU], (instregex "LTGF(R)?$")>;
+
+// Moves
+def : InstRW<[FXU], (instregex "MVGHI$")>;
+def : InstRW<[FXU], (instregex "MVH(I|HI)$")>;
+def : InstRW<[FXU], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU_2cyc, LSU, FXU, BeginGroup], (instregex "MVC$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads (LSU)
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLG(C|F|H|FRL|HRL)$")>;
+def : InstRW<[LSU], (instregex "LLH(RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "L(X|128)$")>;
+
+// Loads (FXU)
+def : InstRW<[FXU], (instregex "LLCH$")>;
+def : InstRW<[FXU], (instregex "LLHH$")>;
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|F|H)R$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+def : InstRW<[FXU], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU], (instregex "LAX(G)?$")>;
+def : InstRW<[FXU], (instregex "LB(H|R|Mux)?$")>;
+def : InstRW<[FXU], (instregex "LGR$")>;
+def : InstRW<[FXU], (instregex "LGB(R)?$")>;
+def : InstRW<[FXU], (instregex "LGF(I)?$")>;
+def : InstRW<[FXU], (instregex "LGFR(L)?$")>;
+def : InstRW<[FXU], (instregex "LGH(I)?$")>;
+def : InstRW<[FXU], (instregex "LGHR(L)?$")>;
+def : InstRW<[FXU], (instregex "LH(H|I|Y|Mux|IMux)?$")>;
+def : InstRW<[FXU], (instregex "LHR(L)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LRV(R)?$")>;
+def : InstRW<[FXU], (instregex "LRVG(R)?$")>;
+
+// Load GR from FPR
+def : InstRW<[FXU_3cyc], (instregex "LGDR$")>;
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU_5cyc, LSU, LSU, LSU, LSU, BeginGroup], (instregex "LMG$")>;
+
+// Load Complement / Negative / Positive
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU_2cyc], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "LCGFR$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU_2cyc], (instregex "LP(G)?R$")>;
+
+// Load on condition
+def : InstRW<[FXU_2cyc, EndGroup], (instregex "LOC(R)?$")>;
+def : InstRW<[FXU_2cyc, EndGroup], (instregex "LOCG(R)?$")>;
+
+// Stores
+def : InstRW<[FXU], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU], (instregex "ST(X|128)$")>;
+def : InstRW<[FXU], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXU], (instregex "ST(Y|FH|RL|Mux)?$")>;
+def : InstRW<[FXU], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU], (instregex "STRV(G)?$")>;
+
+// Store on condition / CondStore pseudos
+def : InstRW<[FXU, EndGroup], (instregex "STOC(G)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+// Store multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, FXU_5cyc, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "STMG$")>;
+
+// Select pseudo 
+def : InstRW<[FXU], (instregex "Select(32|64|F32|F64|F128|32Mux)$")>;
+
+// String instructions
+def : InstRW<[FXU_30cyc], (instregex "SRST$")>;
+def : InstRW<[LSU_30cyc, GroupAlone], (instregex "MVST$")>;
+def : InstRW<[LSU_30cyc, GroupAlone], (instregex "CLST$")>;
+
+///// FLOATING POINT
+
+// Addition
+def : InstRW<[FPU_Bcyc], (instregex "AEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "ADB(R)?$")>;
+def : InstRW<[FPU_20cyc, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[FPU_Bcyc], (instregex "SEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "SDB(R)?$")>;
+def : InstRW<[FPU_20cyc, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[FPU_Bcyc], (instregex "MEEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "MDB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "MDEB(R)?$")>;
+def : InstRW<[FPU_Bplus2cyc, GroupAlone], (instregex "MXDB(R)?$")>;
+def : InstRW<[FPU_30cyc, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "MAEB(R)?$")>;
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "MSEB(R)?$")>;
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "M(A|S)DBR$")>;
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "M(A|S)DB$")>;
+
+// Division
+def : InstRW<[FPU_30cyc], (instregex "DEB(R)?$")>;
+def : InstRW<[FPU_30cyc], (instregex "DDB(R)?$")>;
+def : InstRW<[FPU_30cyc, GroupAlone], (instregex "DXBR$")>;
+
+// Square root
+def : InstRW<[FPU_30cyc], (instregex "SQEB(R)?$")>;
+def : InstRW<[FPU_30cyc], (instregex "SQDB(R)?$")>;
+def : InstRW<[FPU_30cyc, GroupAlone], (instregex "SQXBR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, FPU_Bcyc, GroupAlone], (instregex "CE(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, GroupAlone], (instregex "CD(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bplus2cyc, GroupAlone], (instregex "CX(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, BeginGroup], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, BeginGroup], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bplus2cyc, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, FPU_Bplus1cyc, GroupAlone], (instregex "CF(E|D|X)BR$")>;
+def : InstRW<[FXU, FPU_Bplus1cyc, GroupAlone], (instregex "CG(E|D|X)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, BeginGroup], (instregex "CLF(E|D)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, GroupAlone], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, FPU_Bplus1cyc, BeginGroup], (instregex "CL(F|G)XBR$")>;
+
+// Copy sign
+def : InstRW<[FXU, FXU_3cyc, BeginGroup], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[FXU, FXU_3cyc, BeginGroup], (instregex "CPSDRs(d|s)$")>;
+
+// Compare
+def : InstRW<[FPU_Bcyc], (instregex "CEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "CDB(R)?$")>;
+def : InstRW<[FPU_30cyc], (instregex "CXBR$")>;
+
+// Load and Test
+def : InstRW<[FPU_Bcyc], (instregex "LT(D|E)BR$")>;
+def : InstRW<[FPU_Bcyc], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU_Bplus1cyc, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[FPU_Bplus1cyc, GroupAlone],
+             (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Load
+def : InstRW<[LSU], (instregex "LE(Y)?$")>;
+def : InstRW<[FXU], (instregex "LER$")>;
+def : InstRW<[FXU], (instregex "LD(R|GR)$")>;
+def : InstRW<[FXU_2cyc, FXU, GroupAlone], (instregex "LXR$")>;
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU_2cyc, FXU, GroupAlone], (instregex "LZXR$")>;
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU_Bcyc], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[FPU_Bcyc], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[FPU_Bplus1cyc, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Load lengthened
+def : InstRW<[FPU_Bcyc], (instregex "LDEB(R)?$")>;
+def : InstRW<[FPU_Bplus2cyc, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[FPU_Bplus2cyc, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Load rounded
+def : InstRW<[FPU_Bcyc], (instregex "LEDBR(A)?$")>;
+def : InstRW<[FPU_Bplus2cyc], (instregex "LEXBR(A)?$")>;
+def : InstRW<[FPU_Bplus2cyc], (instregex "LDXBR(A)?$")>;
+
+// Load FP integer
+def : InstRW<[FPU_Bcyc], (instregex "FIEBR(A)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "FIDBR(A)?$")>;
+def : InstRW<[FPU_15cyc, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+// Store
+def : InstRW<[FXU_3cyc], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU_3cyc], (instregex "STE(Y)?$")>;
+
+///// INLINE ASSEMBLY
+
+def : InstRW<[FXU_8cyc],  (instregex "STCKF$")>;
+def : InstRW<[FXU_15cyc], (instregex "STCK$")>;
+def : InstRW<[FXU_20cyc], (instregex "STCKE$")>;
+def : InstRW<[FXU], (instregex "STFLE$")>;
+
+///// OTHER
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+// Prefetch data
+def : InstRW<[LSU, GroupAlone], (instregex "PFD(RL)?$")>;
+
+// Extract access register
+def : InstRW<[LSU], (instregex "EAR$")>;
+
+// Insert Program Mask
+def : InstRW<[FXU_3cyc, EndGroup], (instregex "IPM$")>;
+
+}
+
Index: lib/Target/SystemZ/SystemZScheduleZEC12.td
===================================================================
--- /dev/null
+++ lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -0,0 +1,535 @@
+//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ZEC12 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def ZEC12Model : SchedMachineModel {
+    
+    let IssueWidth = 3;             // 3 instructions decoded per cycle.
+    let MicroOpBufferSize = 40;     // Issue queues
+    let MinLatency = 0;             // Out-of-order
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty  = 8;
+
+    // Max micro-ops that can be buffered for
+    // optimized loop dispatch/execution.
+    let LoopMicroOpBufferSize = 12;
+
+    // This model does not include operand specific information.
+    let CompleteModel = 0;
+}
+
+let SchedModel = ZEC12Model in  {
+
+// Execution units. BufferSize controls when scheduler will start to
+// postpone scheduling of instructions using that particular unit.
+def ZEC12_VBUnit : ProcResource<1>;
+def ZEC12_FXUnit : ProcResource<1> { let BufferSize = 2; /* ooo */ }
+def ZEC12_LSUnit : ProcResource<1> { let BufferSize = 2; /* ooo */ }
+def ZEC12_FPUnit : ProcResource<1> { let BufferSize = 2; /* ooo */ }
+
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+
+def : WriteRes<BeginGroup, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+}
+
+def : WriteRes<EndGroup, []> {
+  let NumMicroOps = 0;
+  let EndGroup    = 1;
+}
+
+// Subtarget specific definitions of scheduling resources.
+
+def : WriteRes<FXU,       [ZEC12_FXUnit]> { let Latency = 1; }
+def : WriteRes<FXU_2cyc,  [ZEC12_FXUnit]> { let Latency = 2; }
+def : WriteRes<FXU_3cyc,  [ZEC12_FXUnit]> { let Latency = 3; }
+def : WriteRes<FXU_4cyc,  [ZEC12_FXUnit]> { let Latency = 4; }
+def : WriteRes<FXU_5cyc,  [ZEC12_FXUnit]> { let Latency = 5; }
+def : WriteRes<FXU_6cyc,  [ZEC12_FXUnit]> { let Latency = 6; }
+def : WriteRes<FXU_7cyc,  [ZEC12_FXUnit]> { let Latency = 7; }
+def : WriteRes<FXU_8cyc,  [ZEC12_FXUnit]> { let Latency = 8; }
+def : WriteRes<FXU_9cyc,  [ZEC12_FXUnit]> { let Latency = 9; }
+def : WriteRes<FXU_15cyc, [ZEC12_FXUnit]> { let Latency = 15; }
+def : WriteRes<FXU_20cyc, [ZEC12_FXUnit]> { let Latency = 20; }
+def : WriteRes<FXU_30cyc, [ZEC12_FXUnit]> { let Latency = 30; }
+
+def : WriteRes<LSU,       [ZEC12_LSUnit]> { let Latency = 1; }
+def : WriteRes<LSU_2cyc,  [ZEC12_LSUnit]> { let Latency = 2; }
+def : WriteRes<LSU_5cyc,  [ZEC12_LSUnit]> { let Latency = 5; }
+def : WriteRes<LSU_6cyc,  [ZEC12_LSUnit]> { let Latency = 6; }
+def : WriteRes<LSU_20cyc, [ZEC12_LSUnit]> { let Latency = 20; }
+def : WriteRes<LSU_30cyc, [ZEC12_LSUnit]> { let Latency = 30; }
+
+def : WriteRes<FPU_Bcyc,      [ZEC12_FPUnit]> { let Latency = 8; }
+def : WriteRes<FPU_Bplus1cyc, [ZEC12_FPUnit]> { let Latency = 9; }
+def : WriteRes<FPU_Bplus2cyc, [ZEC12_FPUnit]> { let Latency = 10; }
+def : WriteRes<FPU_15cyc,  [ZEC12_FPUnit]> { let Latency = 15; }
+def : WriteRes<FPU_20cyc,  [ZEC12_FPUnit]> { let Latency = 20; }
+def : WriteRes<FPU_30cyc,  [ZEC12_FPUnit]> { let Latency = 30; }
+
+def : WriteRes<VBU,  [ZEC12_VBUnit]>;
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//  Call
+def : InstRW<[VBU, FXU_2cyc, FXU, GroupAlone], (instregex "BRAS$")>;
+def : InstRW<[FXU_2cyc, FXU, LSU, GroupAlone], (instregex "(Call)?BASR$")>;
+def : InstRW<[LSU, EndGroup], (instregex "CallBR$")>;
+def : InstRW<[LSU, FXU_2cyc, FXU, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[LSU, FXU_2cyc, FXU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+
+// Return
+def : InstRW<[LSU, EndGroup], (instregex "Return$")>;
+
+// Serialize
+def : InstRW<[LSU, EndGroup], (instregex "Serialize$")>;
+
+///// FIXED POINT
+
+// Addition
+def : InstRW<[FXU], (instregex "A(Y|IH|SI)?$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AG(SI)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU], (instregex "AL(Y|FI|HSIK)?$")>;
+def : InstRW<[FXU], (instregex "ALG(HSIK)?$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)?$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "ALC(R)?$")>;
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "ALCG(R)?$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc], (instregex "AGF(R)?$")>;
+
+// Add halfword
+def : InstRW<[FXU_2cyc], (instregex "AH(Y)?$")>;
+
+// Subtraction
+def : InstRW<[FXU], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SL(G|Y|FI)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)?$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "SLB(G|R)?$")>;
+def : InstRW<[FXU_3cyc, GroupAlone], (instregex "SLBGR$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc], (instregex "SGF(R)?$")>;
+
+// Subtract halfword
+def : InstRW<[FXU_2cyc], (instregex "SH(Y)?$")>;
+
+// Multiply
+def : InstRW<[FXU_6cyc], (instregex "MS(R|Y|FI)?$")>;
+def : InstRW<[FXU_8cyc], (instregex "MSG(R)?$")>;
+def : InstRW<[FXU_6cyc], (instregex "MSGF(I|R)?$")>;
+def : InstRW<[FXU_9cyc, GroupAlone], (instregex "MLG(R)?$")>;
+def : InstRW<[FXU_5cyc], (instregex "MGHI$")>;
+def : InstRW<[FXU_5cyc], (instregex "MH(I|Y)?$")>;
+
+// Divide
+def : InstRW<[FPU_30cyc, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DSG(F)?R$")>;
+def : InstRW<[FPU_30cyc, LSU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DSG(F)?$")>;
+def : InstRW<[FPU_15cyc, FXU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DLR$")>;
+def : InstRW<[FPU_30cyc, FXU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DLGR$")>;
+def : InstRW<[FPU_15cyc, LSU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DL$")>;
+def : InstRW<[FPU_30cyc, LSU, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "DLG$")>;
+
+// And
+def : InstRW<[FXU], (instregex "N(G|Y|TSTG)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(Y|FMux|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+
+// Or
+def : InstRW<[FXU], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU], (instregex "OI(Y|FMux|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+
+// Xor
+def : InstRW<[FXU], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "X(G|Y|IFMux)?$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+
+// Insert
+def : InstRW<[FXU], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+// And / Or / Xor character
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "NC$")>;
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "OC$")>;
+def : InstRW<[LSU, FXU, BeginGroup], (instregex "XC$")>;
+
+// Rotate
+def : InstRW<[FXU], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU_2cyc, GroupAlone], (instregex "R(N|O|X)SBG$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// Find leftmost one
+def : InstRW<[FXU_7cyc, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU_3cyc], (instregex "POPCNT$")>;
+
+// Compare
+def : InstRW<[FXU], (instregex "CG$")>;
+def : InstRW<[FXU], (instregex "C(G|Y|IH|Mux)?$")>;
+def : InstRW<[FXU], (instregex "CFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CGFI$")>;
+def : InstRW<[FXU], (instregex "CGH(I|SI)$")>;
+def : InstRW<[FXU], (instregex "CGR(L)?$")>;
+def : InstRW<[FXU], (instregex "CH(I|F|SI)$")>;
+def : InstRW<[FXU], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I)?$")>;
+def : InstRW<[FXU], (instregex "CLGFR(L)?$")>;
+def : InstRW<[FXU], (instregex "CLGR(L)?$")>;
+def : InstRW<[FXU], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLI(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR(L)?$")>;
+def : InstRW<[FXU], (instregex "CR(L)?$")>;
+
+// Compare halfword
+def : InstRW<[FXU_2cyc], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU_2cyc], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU_2cyc], (instregex "CGF(R|RL)?$")>;
+
+// Compare and swap
+def : InstRW<[FXU, FXU, BeginGroup], (instregex "CS(G|Y)?$")>;
+
+// Compare logical character
+def : InstRW<[FXU, LSU, BeginGroup], (instregex "CLC$")>;
+
+// Test under mask
+def : InstRW<[FXU], (instregex "TM(Y|HMux|LMux)?$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+// Load and test
+def : InstRW<[FXU], (instregex "LT(R)?$")>;
+def : InstRW<[FXU], (instregex "LTG(R)?$")>;
+def : InstRW<[FXU], (instregex "LTGF(R)?$")>;
+
+// Moves
+def : InstRW<[FXU], (instregex "MVGHI$")>;
+def : InstRW<[FXU], (instregex "MVH(I|HI)$")>;
+def : InstRW<[FXU], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU_2cyc, LSU, FXU, BeginGroup], (instregex "MVC$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads (LSU)
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLG(C|F|H|FRL|HRL)$")>;
+def : InstRW<[LSU], (instregex "LLH(RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "L(X|128)$")>;
+
+// Loads (FXU)
+def : InstRW<[FXU], (instregex "LLCH$")>;
+def : InstRW<[FXU], (instregex "LLHH$")>;
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|F|H)R$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+def : InstRW<[FXU], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU], (instregex "LAX(G)?$")>;
+def : InstRW<[FXU], (instregex "LB(H|R|Mux)?$")>;
+def : InstRW<[FXU], (instregex "LGR$")>;
+def : InstRW<[FXU], (instregex "LGB(R)?$")>;
+def : InstRW<[FXU], (instregex "LGF(I)?$")>;
+def : InstRW<[FXU], (instregex "LGFR(L)?$")>;
+def : InstRW<[FXU], (instregex "LGH(I)?$")>;
+def : InstRW<[FXU], (instregex "LGHR(L)?$")>;
+def : InstRW<[FXU], (instregex "LH(H|I|Y|Mux|IMux)?$")>;
+def : InstRW<[FXU], (instregex "LHR(L)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LRV(R)?$")>;
+def : InstRW<[FXU], (instregex "LRVG(R)?$")>;
+
+// Load GR from FPR
+def : InstRW<[FXU_3cyc], (instregex "LGDR$")>;
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU_5cyc, LSU, LSU, LSU, LSU, GroupAlone], (instregex "LMG$")>;
+
+// Load Complement / Negative / Positive
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU_2cyc], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "LCGFR$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU_2cyc], (instregex "LP(G)?R$")>;
+
+// Load on condition
+def : InstRW<[FXU_2cyc, EndGroup], (instregex "LOC(R)?$")>;
+def : InstRW<[FXU_2cyc, EndGroup], (instregex "LOCG(R)?$")>;
+
+// Stores
+def : InstRW<[FXU], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU], (instregex "ST(X|128)$")>;
+def : InstRW<[FXU], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXU], (instregex "ST(Y|FH|RL|Mux)?$")>;
+def : InstRW<[FXU], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU], (instregex "STRV(G)?$")>;
+
+// Store on condition / CondStore pseudos
+def : InstRW<[FXU, EndGroup], (instregex "STOC(G)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+// Store multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, FXU_5cyc, FXU, FXU, FXU, FXU,
+              GroupAlone], (instregex "STMG$")>;
+
+// Select pseudo 
+def : InstRW<[FXU], (instregex "Select(32|64|F32|F64|F128|32Mux)$")>;
+
+// String instructions
+def : InstRW<[FXU_30cyc], (instregex "SRST$")>;
+def : InstRW<[LSU_30cyc, GroupAlone], (instregex "MVST$")>;
+def : InstRW<[LSU_30cyc, GroupAlone], (instregex "CLST$")>;
+
+///// FLOATING POINT
+
+// Addition
+def : InstRW<[FPU_Bcyc], (instregex "AEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "ADB(R)?$")>;
+def : InstRW<[FPU_20cyc, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[FPU_Bcyc], (instregex "SEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "SDB(R)?$")>;
+def : InstRW<[FPU_20cyc, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[FPU_Bcyc], (instregex "MEEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "MDB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "MDEB(R)?$")>;
+def : InstRW<[FPU_Bplus2cyc, GroupAlone], (instregex "MXDB(R)?$")>;
+def : InstRW<[FPU_30cyc, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "MAEB(R)?$")>;
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "MSEB(R)?$")>;
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "M(A|S)DBR$")>;
+def : InstRW<[FPU_Bcyc, GroupAlone], (instregex "M(A|S)DB$")>;
+
+// Division
+def : InstRW<[FPU_30cyc], (instregex "DEB(R)?$")>;
+def : InstRW<[FPU_30cyc], (instregex "DDB(R)?$")>;
+def : InstRW<[FPU_30cyc, GroupAlone], (instregex "DXBR$")>;
+
+// Square root
+def : InstRW<[FPU_30cyc], (instregex "SQEB(R)?$")>;
+def : InstRW<[FPU_30cyc], (instregex "SQDB(R)?$")>;
+def : InstRW<[FPU_30cyc, GroupAlone], (instregex "SQXBR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, FPU_Bcyc, GroupAlone], (instregex "CE(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, GroupAlone], (instregex "CD(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bplus2cyc, GroupAlone], (instregex "CX(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, BeginGroup], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, BeginGroup], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, FPU_Bplus2cyc, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, FPU_Bplus1cyc, GroupAlone], (instregex "CF(E|D|X)BR$")>;
+def : InstRW<[FXU, FPU_Bplus1cyc, GroupAlone], (instregex "CG(E|D|X)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, BeginGroup], (instregex "CLF(E|D)BR$")>;
+def : InstRW<[FXU, FPU_Bcyc, GroupAlone], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, FPU_Bplus1cyc, BeginGroup], (instregex "CL(F|G)XBR$")>;
+
+// Copy sign
+def : InstRW<[FXU, FXU_3cyc, BeginGroup], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[FXU, FXU_3cyc, BeginGroup], (instregex "CPSDRs(d|s)$")>;
+
+// Compare
+def : InstRW<[FPU_Bcyc], (instregex "CEB(R)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "CDB(R)?$")>;
+def : InstRW<[FPU_30cyc], (instregex "CXBR$")>;
+
+// Load and Test
+def : InstRW<[FPU_Bcyc], (instregex "LT(D|E)BR$")>;
+def : InstRW<[FPU_Bcyc], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU_Bplus1cyc, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[FPU_Bplus1cyc, GroupAlone],
+             (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Load
+def : InstRW<[LSU], (instregex "LE(Y)?$")>;
+def : InstRW<[FXU], (instregex "LER$")>;
+def : InstRW<[FXU], (instregex "LD(R|GR)$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "LXR$")>;
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU_2cyc, FXU, BeginGroup], (instregex "LZXR$")>;
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU_Bcyc], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[FPU_Bcyc], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[FPU_Bplus1cyc, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Load lengthened
+def : InstRW<[FPU_Bcyc], (instregex "LDEB(R)?$")>;
+def : InstRW<[FPU_Bplus2cyc, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[FPU_Bplus2cyc, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Load rounded
+def : InstRW<[FPU_Bcyc], (instregex "LEDBR(A)?$")>;
+def : InstRW<[FPU_Bplus2cyc], (instregex "LEXBR(A)?$")>;
+def : InstRW<[FPU_Bplus2cyc], (instregex "LDXBR(A)?$")>;
+
+// Load FP integer
+def : InstRW<[FPU_Bcyc], (instregex "FIEBR(A)?$")>;
+def : InstRW<[FPU_Bcyc], (instregex "FIDBR(A)?$")>;
+def : InstRW<[FPU_15cyc, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+// Store
+def : InstRW<[FXU_3cyc], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU_3cyc], (instregex "STE(Y)?$")>;
+
+///// INLINE ASSEMBLY
+
+def : InstRW<[FXU, LSU, BeginGroup], (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, FXU_2cyc, FXU, BeginGroup], (instregex "STCKE$")>;
+def : InstRW<[FXU], (instregex "STFLE$")>;
+
+///// OTHER
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXU_5cyc, FXU, FXU, FXU, FXU, GroupAlone],
+              (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[LSU, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+// Prefetch data
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+
+// Extract access register
+def : InstRW<[LSU], (instregex "EAR$")>;
+
+// Insert Program Mask
+def : InstRW<[FXU_3cyc, EndGroup], (instregex "IPM$")>;
+
+}
+
Index: lib/Target/SystemZ/SystemZSubtarget.h
===================================================================
--- lib/Target/SystemZ/SystemZSubtarget.h
+++ lib/Target/SystemZ/SystemZSubtarget.h
@@ -73,6 +73,25 @@
     return &TSInfo;
   }
 
+  bool isZ10() const { return getCPU().equals("z10"); }
+
+  bool enableMachineScheduler() const override {
+    // Disabling mischeduler for z10 (failing test-case: fp-move-02.ll)
+    return (!isZ10());
+  }
+
+  // Returning true here (default) makes the DAG scheduler schedule
+  // for source order (if returning true above), if running MIScheduler.
+  bool enableMachineSchedDefaultSched() const override;
+
+  bool keepSched_hasManyFoldable(MachineBasicBlock::iterator Begin,
+                                 MachineBasicBlock::iterator End) const;
+
+  void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                           MachineBasicBlock::iterator Begin,
+                           MachineBasicBlock::iterator End,
+                           unsigned NumRegionInstrs) const override;
+
   // This is important for reducing register pressure in vector code.
   bool useAA() const override { return true; }
 
Index: lib/Target/SystemZ/SystemZSubtarget.cpp
===================================================================
--- lib/Target/SystemZ/SystemZSubtarget.cpp
+++ lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -10,9 +10,20 @@
 #include "SystemZSubtarget.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 
 using namespace llvm;
 
+extern cl::opt<std::string> SchedPref;
+
+static cl::opt<bool>
+FoldableReloadHeuristic("foldable-reloads", cl::Hidden,
+            cl::desc("Consider reg->memory opcodes during mischeduling"), 
+            cl::init(true));
+
 #define DEBUG_TYPE "systemz-subtarget"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
@@ -70,3 +81,22 @@
   // case isn't easy to detect.
   return false;
 }
+
+bool SystemZSubtarget::enableMachineSchedDefaultSched() const {
+  return (SchedPref=="source");
+}
+
+void SystemZSubtarget::
+overrideSchedPolicy(MachineSchedPolicy &Policy,
+                    MachineBasicBlock::iterator Begin,
+                    MachineBasicBlock::iterator End,
+                    unsigned NumRegionInstrs) const
+{
+  // Bidirectional scheduling pre-ra is benefitial according to benchmarks.
+  Policy.OnlyTopDown = false;
+  Policy.OnlyBottomUp = false;
+  // Enable heuristic for foldable reloads, i.e. prefer to spill a
+  // register if it is read by an instruction who can fold the reload.
+  Policy.FoldableReloadHeuristic = FoldableReloadHeuristic;
+}
+}
Index: lib/Target/SystemZ/SystemZTargetMachine.cpp
===================================================================
--- lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -9,10 +9,12 @@
 
 #include "SystemZTargetMachine.h"
 #include "SystemZTargetTransformInfo.h"
+#include "SystemZMachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
@@ -104,6 +106,12 @@
     return getTM<SystemZTargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    return new ScheduleDAGMI(C, make_unique<SystemZPostRASchedStrategy>(C),
+                             /*IsPostRA=*/true);
+  }
+
   void addIRPasses() override;
   bool addInstSelector() override;
   void addPreSched2() override;
@@ -168,12 +176,8 @@
   // Do final scheduling after all other optimizations, to get an
   // optimal input for the decoder (branch relaxation must happen
   // after block placement).
-  if (getOptLevel() != CodeGenOpt::None) {
-    if (MISchedPostRA)
-      addPass(&PostMachineSchedulerID);
-    else
-      addPass(&PostRASchedulerID);
-  }
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&PostMachineSchedulerID);
 }
 
 TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
Index: test/CodeGen/SystemZ/alias-01.ll
===================================================================
--- test/CodeGen/SystemZ/alias-01.ll
+++ test/CodeGen/SystemZ/alias-01.ll
@@ -1,4 +1,5 @@
-; Test 32-bit ANDs in which the second operand is variable.
+; Check that spilling is not needed with 16 independent
+; load-add-stores.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
Index: test/CodeGen/SystemZ/alloca-01.ll
===================================================================
--- test/CodeGen/SystemZ/alloca-01.ll
+++ test/CodeGen/SystemZ/alloca-01.ll
@@ -29,12 +29,14 @@
 ; CHECK: lgr %r15, [[REG2]]
 ;
 ; CHECK-A-LABEL: f1:
-; CHECK-A: lgr %r15, %r1
-; CHECK-A: la %r2, 176(%r1)
+; CHECK-A-DAG: lgr %r15, %r1
+; CHECK-A-DAG: la %r2, 176(%r1)
+; CHECK: br %r14
 ;
 ; CHECK-B-LABEL: f1:
 ; CHECK-B: lgr %r15, %r1
-; CHECK-B: la %r3, 177(%r1)
+; CHECK-B: la %r0, 177(%r1)
+; CHECK-B: lgr %r3, %r0
 ;
 ; CHECK-C-LABEL: f1:
 ; CHECK-C: lgr %r15, %r1
Index: test/CodeGen/SystemZ/alloca-02.ll
===================================================================
--- test/CodeGen/SystemZ/alloca-02.ll
+++ test/CodeGen/SystemZ/alloca-02.ll
@@ -10,24 +10,24 @@
 
 define i64 @f1(i64 %length, i64 %index) {
 ; CHECK-A-LABEL: f1:
-; CHECK-A: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-A: la %r2, 160([[ADDR]])
+; CHECK-A-DAG: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-A-DAG: la %r2, 160([[ADDR]])
 ; CHECK-A: mvi 0(%r2), 0
 ;
 ; CHECK-B-LABEL: f1:
-; CHECK-B: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-B: la %r2, 160([[ADDR]])
+; CHECK-B-DAG: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-B-DAG: la %r2, 160([[ADDR]])
 ; CHECK-B: mvi 4095(%r2), 1
 ;
 ; CHECK-C-LABEL: f1:
-; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-C-DAG: la %r2, 160([[ADDR]])
+; CHECK-C-DAG: la %r2, 160([[ADDR:%r[1-5]]])
+; CHECK-C-DAG: lgr %r15, [[ADDR]]
 ; CHECK-C-DAG: lhi [[TMP:%r[0-5]]], 2
 ; CHECK-C: stc [[TMP]], 0({{%r3,%r2|%r2,%r3}})
 ;
 ; CHECK-D-LABEL: f1:
-; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-D-DAG: la %r2, 160([[ADDR]])
+; CHECK-D-DAG: la %r2, 160([[ADDR:%r[1-5]]])
+; CHECK-D-DAG: lgr %r15, [[ADDR]]
 ; CHECK-D-DAG: lhi [[TMP:%r[0-5]]], 3
 ; CHECK-D: stc [[TMP]], 4095({{%r3,%r2|%r2,%r3}})
 ;
Index: test/CodeGen/SystemZ/alloca-03.ll
===================================================================
--- test/CodeGen/SystemZ/alloca-03.ll
+++ test/CodeGen/SystemZ/alloca-03.ll
@@ -15,13 +15,14 @@
 ; Allocate %len * 8, no need to align stack.
 define void @f1(i64 %len) {
 ; CHECK-LABEL: f1:
-; CHECK: sllg    %r0, %r2, 3
-; CHECK: lgr     %r1, %r15
+; CHECK-DAG: sllg    %r0, %r2, 3
+; CHECK-DAG: lgr     %r1, %r15
 ; CHECK: sgr     %r1, %r0
 ; CHECK-NOT: ngr
-; CHECK: lgr     %r15, %r1
-; CHECK: la      %r1, 160(%r1)
-; CHECK: mvghi   0(%r1), 10
+; CHECK-DAG: lgr     %r15, %r1
+; CHECK-DAG: la      [[ADDR:%r[1-2]]], 160(%r1)
+; CHECK-DAG: mvghi   0([[ADDR]]), 10
+; CHECK: br %r14
   %x = alloca i64, i64 %len
   store volatile i64 10, i64* %x
   ret void
@@ -31,10 +32,11 @@
 define void @f2() {
 ; CHECK-LABEL: f2:
 ; CHECK: aghi    %r1, -128
-; CHECK: lgr     %r15, %r1
-; CHECK: la      %r1, 280(%r1)
-; CHECK: nill	 %r1, 65408
-; CHECK: mvghi   0(%r1), 10
+; CHECK-DAG: lgr     %r15, %r1
+; CHECK-DAG: la      [[ADDR:%r[1-2]]], 280(%r1)
+; CHECK-DAG: nill	 [[ADDR]], 65408
+; CHECK-DAG: mvghi   0([[ADDR]]), 10
+; CHECK: br %r14
   %x = alloca i64, i64 1, align 128
   store volatile i64 10, i64* %x, align 128
   ret void
@@ -43,14 +45,14 @@
 ; Dynamic alloca, align 128.
 define void @f3(i64 %len) {
 ; CHECK-LABEL: f3:
-; CHECK: sllg	%r1, %r2, 3
-; CHECK: la	%r0, 120(%r1)
-; CHECK: lgr	%r1, %r15
+; CHECK-DAG: sllg	[[ADDR:%r[1-2]]], %r2, 3
+; CHECK-DAG: la	%r0, 120([[ADDR]])
+; CHECK-DAG: lgr	%r1, %r15
 ; CHECK: sgr	%r1, %r0
-; CHECK: lgr	%r15, %r1
-; CHECK: la	%r1, 280(%r1)
-; CHECK: nill	%r1, 65408
-; CHECK: mvghi	0(%r1), 10
+; CHECK-DAG: lgr	%r15, %r1
+; CHECK-DAG: la	[[ADDR:%r[1-2]]], 280(%r1)
+; CHECK-DAG: nill	[[ADDR]], 65408
+; CHECK: mvghi	0([[ADDR]]), 10
   %x = alloca i64, i64 %len, align 128
   store volatile i64 10, i64* %x, align 128
   ret void
@@ -73,10 +75,10 @@
 
 ; CHECK: lgr	%r1, %r15
 ; CHECK: aghi	%r1, -128
-; CHECK: lgr	%r15, %r1
-; CHECK: la	%r1, 280(%r1)
-; CHECK: nill	%r1, 65408
-; CHECK: mvhi	0(%r1), 10
+; CHECK-DAG: lgr	%r15, %r1
+; CHECK-DAG: la	[[ADDR:%r[1-2]]], 280(%r1)
+; CHECK-DAG: nill	[[ADDR]], 65408
+; CHECK: mvhi	0([[ADDR]]), 10
   %x = alloca i32, i64 1, align 128
   store volatile i32 10, i32* %x
   ret void
Index: test/CodeGen/SystemZ/args-01.ll
===================================================================
--- test/CodeGen/SystemZ/args-01.ll
+++ test/CodeGen/SystemZ/args-01.ll
@@ -40,16 +40,16 @@
 ;
 ; CHECK-FP128-1-LABEL: foo:
 ; CHECK-FP128-1: aghi %r15, -256
-; CHECK-FP128-1: lzxr %f0
-; CHECK-FP128-1-DAG: std %f0, 224(%r15)
-; CHECK-FP128-1-DAG: std %f2, 232(%r15)
+; CHECK-FP128-1: lzxr [[REG:%f[01]]]
+; CHECK-FP128-1-DAG: std [[REG]], 224(%r15)
+; CHECK-FP128-1-DAG: std %f{{2|3}}, 232(%r15)
 ; CHECK-FP128-1: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-2-LABEL: foo:
 ; CHECK-FP128-2: aghi %r15, -256
-; CHECK-FP128-2: lzxr %f0
-; CHECK-FP128-2-DAG: std %f0, 240(%r15)
-; CHECK-FP128-2-DAG: std %f2, 248(%r15)
+; CHECK-FP128-2: lzxr [[REG:%f[01]]]
+; CHECK-FP128-2-DAG: std [[REG]], 240(%r15)
+; CHECK-FP128-2-DAG: std %f{{2|3}}, 248(%r15)
 ; CHECK-FP128-2: brasl %r14, bar@PLT
 ;
 ; CHECK-STACK-LABEL: foo:
Index: test/CodeGen/SystemZ/args-02.ll
===================================================================
--- test/CodeGen/SystemZ/args-02.ll
+++ test/CodeGen/SystemZ/args-02.ll
@@ -41,16 +41,16 @@
 ;
 ; CHECK-FP128-1-LABEL: foo:
 ; CHECK-FP128-1: aghi %r15, -256
-; CHECK-FP128-1: lzxr %f0
-; CHECK-FP128-1-DAG: std %f0, 224(%r15)
-; CHECK-FP128-1-DAG: std %f2, 232(%r15)
+; CHECK-FP128-1: lzxr [[REG:%f[01]]]
+; CHECK-FP128-1-DAG: std [[REG]], 224(%r15)
+; CHECK-FP128-1-DAG: std %f{{2|3}}, 232(%r15)
 ; CHECK-FP128-1: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-2-LABEL: foo:
 ; CHECK-FP128-2: aghi %r15, -256
-; CHECK-FP128-2: lzxr %f0
-; CHECK-FP128-2-DAG: std %f0, 240(%r15)
-; CHECK-FP128-2-DAG: std %f2, 248(%r15)
+; CHECK-FP128-2: lzxr [[REG:%f[01]]]
+; CHECK-FP128-2-DAG: std [[REG]], 240(%r15)
+; CHECK-FP128-2-DAG: std %f{{2|3}}, 248(%r15)
 ; CHECK-FP128-2: brasl %r14, bar@PLT
 ;
 ; CHECK-STACK-LABEL: foo:
Index: test/CodeGen/SystemZ/args-03.ll
===================================================================
--- test/CodeGen/SystemZ/args-03.ll
+++ test/CodeGen/SystemZ/args-03.ll
@@ -41,25 +41,25 @@
 ;
 ; CHECK-FP128-1-LABEL: foo:
 ; CHECK-FP128-1: aghi %r15, -256
-; CHECK-FP128-1: lzxr %f0
-; CHECK-FP128-1-DAG: std %f0, 224(%r15)
-; CHECK-FP128-1-DAG: std %f2, 232(%r15)
+; CHECK-FP128-1: lzxr [[REG:%f[01]]]
+; CHECK-FP128-1-DAG: std [[REG]], 224(%r15)
+; CHECK-FP128-1-DAG: std %f{{2|3}}, 232(%r15)
 ; CHECK-FP128-1: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-2-LABEL: foo:
 ; CHECK-FP128-2: aghi %r15, -256
-; CHECK-FP128-2: lzxr %f0
-; CHECK-FP128-2-DAG: std %f0, 240(%r15)
-; CHECK-FP128-2-DAG: std %f2, 248(%r15)
+; CHECK-FP128-2: lzxr [[REG:%f[01]]]
+; CHECK-FP128-2-DAG: std [[REG]], 240(%r15)
+; CHECK-FP128-2-DAG: std %f{{2|3}}, 248(%r15)
 ; CHECK-FP128-2: brasl %r14, bar@PLT
 ;
 ; CHECK-STACK-LABEL: foo:
-; CHECK-STACK: aghi %r15, -256
-; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
-; CHECK-STACK: stg [[REGISTER]], 216(%r15)
-; CHECK-STACK: llilf [[AT184:%r[0-5]+]], 4294967288
-; CHECK-STACK: stg [[AT184]], 184(%r15)
-; CHECK-STACK: llill [[AT176:%r[0-5]+]], 65529
+; CHECK-STACK-DAG: aghi %r15, -256
+; CHECK-STACK-DAG: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
+; CHECK-STACK-DAG: stg [[REGISTER]], 216(%r15)
+; CHECK-STACK-DAG: llilf [[AT184:%r[0-5]+]], 4294967288
+; CHECK-STACK-DAG: stg [[AT184]], 184(%r15)
+; CHECK-STACK-DAG: llill [[AT176:%r[0-5]+]], 65529
 ; CHECK-STACK: stg [[AT176]], 176(%r15)
 ; CHECK-STACK: mvghi 208(%r15), 0
 ; CHECK-STACK: mvhi 204(%r15), 0
Index: test/CodeGen/SystemZ/args-06.ll
===================================================================
--- test/CodeGen/SystemZ/args-06.ll
+++ test/CodeGen/SystemZ/args-06.ll
@@ -5,12 +5,13 @@
 
 define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) {
 ; CHECK-LABEL: f1:
-; CHECK: ar %r2, %r3
-; CHECK: ar %r2, %r4
-; CHECK: ar %r2, %r5
-; CHECK: ar %r2, %r6
-; CHECK: lb {{%r[0-5]}}, 167(%r15)
-; CHECK: lb {{%r[0-5]}}, 175(%r15)
+; CHECK-DAG: lb [[REG0:%r[0-5]]], 167(%r15)
+; CHECK-DAG: lb [[REG1:%r[0-5]]], 175(%r15)
+; CHECK-DAG: ar %r2, [[REG0]]
+; CHECK-DAG: ar %r2, [[REG1]]
+; CHECK-DAG: ar %r2, %r4
+; CHECK-DAG: ar %r2, %r5
+; CHECK-DAG: ar %r2, %r6
 ; CHECK: br %r14
   %addb = add i8 %a, %b
   %addc = add i8 %addb, %c
Index: test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
===================================================================
--- test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
+++ test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
@@ -158,8 +158,8 @@
 ; Check that constants are handled.
 define i32 @f13(i32 %dummy, i32 *%ptr) {
 ; CHECK-LABEL: f13:
-; CHECK: lhi [[LIMIT:%r[0-9]+]], 42
-; CHECK: l %r2, 0(%r3)
+; CHECK-DAG: lhi [[LIMIT:%r[0-9]+]], 42
+; CHECK-DAG: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
 ; CHECK: crjle %r2, [[LIMIT]], [[KEEP:\..*]]
Index: test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
===================================================================
--- test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
+++ test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
@@ -125,8 +125,8 @@
 ; Check that constants are handled.
 define i64 @f10(i64 %dummy, i64 *%ptr) {
 ; CHECK-LABEL: f10:
-; CHECK: lghi [[LIMIT:%r[0-9]+]], 42
-; CHECK: lg %r2, 0(%r3)
+; CHECK-DAG: lghi [[LIMIT:%r[0-9]+]], 42
+; CHECK-DAG: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
 ; CHECK: cgrjle %r2, [[LIMIT]], [[KEEP:\..*]]
Index: test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
===================================================================
--- test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
+++ test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
@@ -110,8 +110,8 @@
 ; use the sequence above.
 define i32 @f10(i32 %dummy, i32 *%src) {
 ; CHECK-LABEL: f10:
-; CHECK: llill [[VALUE:%r[0-9+]]], 40000
-; CHECK: l %r2, 0(%r3)
+; CHECK-DAG: llill [[VALUE:%r[0-9+]]], 40000
+; CHECK-DAG: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: cs %r2, [[VALUE]], 0(%r3)
 ; CHECK: jl [[LABEL]]
Index: test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
===================================================================
--- test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
+++ test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
@@ -77,8 +77,8 @@
 ; use the sequence above.
 define i64 @f7(i64 %dummy, i64 *%ptr) {
 ; CHECK-LABEL: f7:
-; CHECK: llilf [[VALUE:%r[0-9+]]], 3000000000
-; CHECK: lg %r2, 0(%r3)
+; CHECK-DAG: llilf [[VALUE:%r[0-9+]]], 3000000000
+; CHECK-DAG: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: csg %r2, [[VALUE]], 0(%r3)
 ; CHECK: jl [[LABEL]]
Index: test/CodeGen/SystemZ/branch-05.ll
===================================================================
--- test/CodeGen/SystemZ/branch-05.ll
+++ test/CodeGen/SystemZ/branch-05.ll
@@ -6,9 +6,9 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ahi %r4, -1
 ; CHECK: clijh %r4, 5,
-; CHECK: llgfr [[OP64:%r[0-5]]], %r4
-; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
-; CHECK: larl [[BASE:%r[1-5]]]
+; CHECK-DAG: llgfr [[OP64:%r[0-5]]], %r4
+; CHECK-DAG: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
+; CHECK-DAG: larl [[BASE:%r[1-5]]]
 ; CHECK: lg [[TARGET:%r[1-5]]], 0([[BASE]],[[INDEX]])
 ; CHECK: br [[TARGET]]
 entry:
Index: test/CodeGen/SystemZ/call-03.ll
===================================================================
--- test/CodeGen/SystemZ/call-03.ll
+++ test/CodeGen/SystemZ/call-03.ll
@@ -64,7 +64,7 @@
 ; the target register is %r1.
 define void @f5(void(i32, i32, i32, i32) *%foo) {
 ; CHECK-LABEL: f5:
-; CHECK: lgr %r1, %r2
+; CHECK: lgr %r{{[0-1]}}, %r2
 ; CHECK-DAG: lhi %r2, 1
 ; CHECK-DAG: lhi %r3, 2
 ; CHECK-DAG: lhi %r4, 3
Index: test/CodeGen/SystemZ/fp-add-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-add-03.ll
+++ test/CodeGen/SystemZ/fp-add-03.ll
@@ -5,12 +5,12 @@
 ; There is no memory form of 128-bit addition.
 define void @f1(fp128 *%ptr, float %f2) {
 ; CHECK-LABEL: f1:
-; CHECK: lxebr %f0, %f0
-; CHECK: ld %f1, 0(%r2)
-; CHECK: ld %f3, 8(%r2)
-; CHECK: axbr %f1, %f0
-; CHECK: std %f1, 0(%r2)
-; CHECK: std %f3, 8(%r2)
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: axbr [[REGISTER:%f[0-1]+]], %f{{0|1}}
+; CHECK: std [[REGISTER]], 0(%r2)
+; CHECK: std %f{{2|3}}, 8(%r2)
 ; CHECK: br %r14
   %f1 = load fp128 , fp128 *%ptr
   %f2x = fpext float %f2 to fp128
Index: test/CodeGen/SystemZ/fp-cmp-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-cmp-03.ll
+++ test/CodeGen/SystemZ/fp-cmp-03.ll
@@ -6,9 +6,9 @@
 ; There is no memory form of 128-bit comparison.
 define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) {
 ; CHECK-LABEL: f1:
-; CHECK: lxebr %f0, %f0
-; CHECK: ld %f1, 0(%r4)
-; CHECK: ld %f3, 8(%r4)
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r4)
+; CHECK-DAG: ld %f3, 8(%r4)
 ; CHECK: cxbr %f1, %f0
 ; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
Index: test/CodeGen/SystemZ/fp-cmp-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-cmp-04.ll
+++ test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -275,10 +275,10 @@
 define void @f14(fp128 *%ptr1, fp128 *%ptr2) {
 ; CHECK-LABEL: f14:
 ; CHECK: ltxbr
-; CHECK-NEXT: dxbr
-; CHECK-NEXT: std
-; CHECK-NEXT: std
-; CHECK-NEXT: mxbr
+; CHECK-DAG: dxbr
+; CHECK-DAG: mxbr
+; CHECK-DAG: std
+; CHECK-DAG: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: jl .L{{.*}}
Index: test/CodeGen/SystemZ/fp-div-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-div-03.ll
+++ test/CodeGen/SystemZ/fp-div-03.ll
@@ -5,9 +5,9 @@
 ; There is no memory form of 128-bit division.
 define void @f1(fp128 *%ptr, float %f2) {
 ; CHECK-LABEL: f1:
-; CHECK: lxebr %f0, %f0
-; CHECK: ld %f1, 0(%r2)
-; CHECK: ld %f3, 8(%r2)
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
 ; CHECK: dxbr %f1, %f0
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: std %f3, 8(%r2)
Index: test/CodeGen/SystemZ/fp-mul-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-mul-04.ll
+++ test/CodeGen/SystemZ/fp-mul-04.ll
@@ -108,7 +108,7 @@
 define double @f7(double *%ptr0) {
 ; CHECK-LABEL: f7:
 ; CHECK: brasl %r14, foo@PLT
-; CHECK: mxdb %f0, 160(%r15)
+; CHECK: mxdb %f{{0|1}}, 160(%r15)
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%ptr0, i64 2
   %ptr2 = getelementptr double, double *%ptr0, i64 4
Index: test/CodeGen/SystemZ/fp-mul-05.ll
===================================================================
--- test/CodeGen/SystemZ/fp-mul-05.ll
+++ test/CodeGen/SystemZ/fp-mul-05.ll
@@ -5,12 +5,12 @@
 ; There is no memory form of 128-bit multiplication.
 define void @f1(fp128 *%ptr, float %f2) {
 ; CHECK-LABEL: f1:
-; CHECK: lxebr %f0, %f0
-; CHECK: ld %f1, 0(%r2)
-; CHECK: ld %f3, 8(%r2)
-; CHECK: mxbr %f1, %f0
-; CHECK: std %f1, 0(%r2)
-; CHECK: std %f3, 8(%r2)
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: mxbr [[REG:%f[0-1]]], %f{{0|1}}
+; CHECK: std [[REG]], 0(%r2)
+; CHECK: std %f{{2|3}}, 8(%r2)
 ; CHECK: br %r14
   %f1 = load fp128 , fp128 *%ptr
   %f2x = fpext float %f2 to fp128
Index: test/CodeGen/SystemZ/fp-sub-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-sub-03.ll
+++ test/CodeGen/SystemZ/fp-sub-03.ll
@@ -5,9 +5,9 @@
 ; There is no memory form of 128-bit subtraction.
 define void @f1(fp128 *%ptr, float %f2) {
 ; CHECK-LABEL: f1:
-; CHECK: lxebr %f0, %f0
-; CHECK: ld %f1, 0(%r2)
-; CHECK: ld %f3, 8(%r2)
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
 ; CHECK: sxbr %f1, %f0
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: std %f3, 8(%r2)
Index: test/CodeGen/SystemZ/vec-args-06.ll
===================================================================
--- test/CodeGen/SystemZ/vec-args-06.ll
+++ test/CodeGen/SystemZ/vec-args-06.ll
@@ -42,29 +42,29 @@
 ; CHECK-LABEL: f2:
 ; CHECK: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 128(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 128(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 112(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 112(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 96(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 96(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 80(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 80(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 64(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 64(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 48(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 48(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 32(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 32(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-; CHECK: vst [[VTMP]], 16(%r2)
-; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK-DAG: vst [[VTMP]], 16(%r2)
+; CHECK-DAG: larl [[TMP:%r[0-5]]], .LCPI
 ; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
 ; CHECK: vst [[VTMP]], 0(%r2)
 ; CHECK: br %r14
Index: test/CodeGen/SystemZ/vec-perm-12.ll
===================================================================
--- test/CodeGen/SystemZ/vec-perm-12.ll
+++ test/CodeGen/SystemZ/vec-perm-12.ll
@@ -7,9 +7,9 @@
 
 define <4 x i32> @f1(<4 x i32> %x, i64 %y) {
 ; CHECK-CODE-LABEL: f1:
-; CHECK-CODE: vlvgf [[ELT:%v[0-9]+]], %r2, 0
-; CHECK-CODE: larl [[REG:%r[0-5]]],
-; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE-DAG: vlvgf [[ELT:%v[0-9]+]], %r2, 0
+; CHECK-CODE-DAG: larl [[REG:%r[0-5]]],
+; CHECK-CODE-DAG: vl [[MASK:%v[0-9]+]], 0([[REG]])
 ; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
 ; CHECK-CODE: br %r14
 
Index: test/CodeGen/SystemZ/vec-perm-13.ll
===================================================================
--- test/CodeGen/SystemZ/vec-perm-13.ll
+++ test/CodeGen/SystemZ/vec-perm-13.ll
@@ -7,9 +7,9 @@
 
 define <4 x i16> @f1(<4 x i16> %x) {
 ; CHECK-CODE-LABEL: f1:
-; CHECK-CODE: larl [[REG:%r[0-5]]],
-; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
-; CHECK-CODE: vgbm [[ELT:%v[0-9]+]], 0
+; CHECK-CODE-DAG: larl [[REG:%r[0-5]]],
+; CHECK-CODE-DAG: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE-DAG: vgbm [[ELT:%v[0-9]+]], 0
 ; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
 ; CHECK-CODE: br %r14
 
Index: test/CodeGen/SystemZ/vec-sub-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-sub-01.ll
+++ test/CodeGen/SystemZ/vec-sub-01.ll
@@ -38,10 +38,9 @@
   ret <2 x i64> %ret
 }
 
-; Test a v4f32 subtraction, as an example of an operation that needs to be
-; scalarized and reassembled.  At present there's an unnecessary move that
-; could be avoided with smarter ordering.  It also isn't important whether
-; the VSLDBs use the result of the VLRs or use %v24 and %v26 directly.
+; Test a v4f32 subtraction, as an example of an operation that needs
+; to be scalarized and reassembled.  It isn't important whether the
+; VSLDBs use the result of the VLRs or use %v24 and %v26 directly.
 define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
 ; CHECK-LABEL: f5:
 ; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24
@@ -52,12 +51,11 @@
 ; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
 ; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
 ; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
-; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]]
-; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]]
+; CHECK-DAG: sebr %f[[A1]], %f[[A2]]
 ; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
 ; CHECK-DAG: sebr %f[[C1]], %f[[C2]]
 ; CHECK-DAG: sebr %f[[D1]], %f[[D2]]
-; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1copy]], %v[[B1]]
+; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1]], %v[[B1]]
 ; CHECK-DAG: vmrhf [[LOW:%v[0-9]+]], %v[[C1]], %v[[D1]]
 ; CHECK: vmrhg %v24, [[HIGH]], [[LOW]]
 ; CHECK: br %r14