Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -18,6 +18,7 @@
 #include "AMDGPUCallLowering.h"
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPUTargetTransformInfo.h"
+#include "GCNSchedStrategy.h"
 #include "R600ISelLowering.h"
 #include "R600InstrInfo.h"
 #include "R600MachineScheduler.h"
@@ -95,6 +96,13 @@
   return new SIScheduleDAGMI(C);
 }
 
+static ScheduleDAGInstrs *createGCNMachineScheduler(MachineSchedContext *C) {
+  ScheduleDAGMILive *DAG =
+      new ScheduleDAGMILive(C, make_unique<GCNSchedStrategy>(C));
+  DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); 
+  return DAG;
+}
+
 static MachineSchedRegistry
 R600SchedRegistry("r600", "Run R600's custom scheduler",
                    createR600MachineScheduler);
@@ -103,6 +111,10 @@
 SISchedRegistry("si", "Run SI's custom scheduler",
                 createSIMachineScheduler);
 
+static MachineSchedRegistry
+GCNSchedRegistry("gcn", "Run GCN custom scheduler",
+                 createGCNMachineScheduler);
+
 static StringRef computeDataLayout(const Triple &TT) {
   if (TT.getArch() == Triple::r600) {
     // 32-bit pointers.
Index: lib/Target/AMDGPU/CMakeLists.txt
===================================================================
--- lib/Target/AMDGPU/CMakeLists.txt
+++ lib/Target/AMDGPU/CMakeLists.txt
@@ -49,6 +49,7 @@
   AMDGPUPromoteAlloca.cpp
   AMDGPURegisterInfo.cpp
   GCNHazardRecognizer.cpp
+  GCNSchedStrategy.cpp
   R600ClauseMergePass.cpp
   R600ControlFlowFinalizer.cpp
   R600EmitClauseMarkers.cpp
Index: lib/Target/AMDGPU/GCNSchedStrategy.h
===================================================================
--- /dev/null
+++ lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -0,0 +1,51 @@
+//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+class SIRegisterInfo;
+
+class GCNSchedStrategy : public GenericScheduler {
+
+  SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+  void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
+                         const RegPressureTracker &RPTracker,
+                         SchedCandidate &Cand);
+
+  void initCandidate(SchedCandidate &Cand, SUnit *SU,
+                     bool AtTop, const RegPressureTracker &RPTracker,
+                     RegPressureTracker &TempTracker,
+                     const SIRegisterInfo *SRI,
+                     int SGPRPressure, int VGPRPressure,
+                     int SGPRExcessLimit, int VGPRExcessLimit,
+                     int SGPRCriticalLimit, int VGPRCriticalLimit);
+
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone, const SIRegisterInfo *SRI,
+                    unsigned SGPRPressure, unsigned VGPRPressure);
+
+public:
+  GCNSchedStrategy(const MachineSchedContext *C);
+
+  SUnit *pickNode(bool &IsTopNode) override;
+};
+
+} // End namespace llvm
+
+#endif // GCNSCHEDSTRATEGY_H
Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp
===================================================================
--- /dev/null
+++ lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -0,0 +1,350 @@
+//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This is a minimal scheduler strategy.  The main difference between this
+/// and the GenericScheduler is that GCNSchedStrategy uses different
+/// heuristics to determine excess/critical pressure sets.
+//===----------------------------------------------------------------------===//
+
+#include "GCNSchedStrategy.h"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+using namespace llvm;
+
+GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) :
+  GenericScheduler(C) { }
+
+static unsigned MaxWavesPerSGPRs(unsigned SGPRs,
+                                 const SISubtarget &ST) {
+
+  if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
+    if (SGPRs <= 80)
+      return 10;
+    if (SGPRs <= 88)
+      return 9;
+    if (SGPRs <= 100)
+      return 8;
+    return 7;
+  }
+  if (SGPRs <= 48)
+    return 10;
+  if (SGPRs <= 56)
+    return 9;
+  if (SGPRs <= 64)
+    return 8;
+  if (SGPRs <= 72)
+    return 7;
+  if (SGPRs <= 80)
+    return 6;
+  return 5;
+}
+
+static unsigned MaxWavesPerVGPRs(unsigned VGPRs) {
+  if (VGPRs <= 24)
+    return 10;
+  if (VGPRs <= 28)
+    return 9;
+  if (VGPRs <= 32)
+    return 8;
+  if (VGPRs <= 36)
+    return 7;
+  if (VGPRs <= 40)
+    return 6;
+  if (VGPRs <= 48)
+    return 5;
+  if (VGPRs <= 64)
+    return 4;
+  if (VGPRs <= 84)
+    return 3;
+  if (VGPRs <= 128)
+    return 2;
+  return 1;
+}
+
+static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
+                            const SISubtarget &ST) {
+  return std::min(MaxWavesPerSGPRs(SGPRs, ST), MaxWavesPerVGPRs(VGPRs));
+}
+
+void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
+                                     bool AtTop, const RegPressureTracker &RPTracker,
+                                     RegPressureTracker &TempTracker,
+                                     const SIRegisterInfo *SRI,
+                                     int SGPRPressure,
+                                     int VGPRPressure,
+                                     int SGPRExcessLimit,
+                                     int VGPRExcessLimit,
+                                     int SGPRCriticalLimit,
+                                     int VGPRCriticalLimit) {
+
+  const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
+  Cand.SU = SU;
+  Cand.AtTop = AtTop;
+
+  // If two instructions increase the pressure of different register sets
+  // by the same amount, the generic scheduler will prefer to schedule the
+  // instruction that increases the set with the least amount of registers,
+  // which in our case would be SGPRs.  This is rarely what we want, so
+  // when we report excess/critical register pressure, we do it either
+  // only for VGPRs or only for SGPRs.
+
+  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
+  const int MaxVGPRPressureInc = 16;
+  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
+  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
+
+  std::vector<unsigned> Pressure;
+  std::vector<unsigned> MaxPressure;
+
+  if (AtTop)
+    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  else
+    // FIXME: I think for bottom up scheduling, the register pressure is cached
+    // and can be retrieved by DAG->getPressureDif(SU).
+    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ 
+  int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+  int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+
+
+  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
+  // to increase the likelihood we don't go over the limits.  We should improve
+  // the analysis to look through dependencies to find the path with the least
+  // register pressure.
+  // FIXME: This is also necessary, because some passes that run after
+  // scheduling and before regalloc increase register pressure.
+  const int ErrorMargin = 3;
+  VGPRExcessLimit -= ErrorMargin;
+  SGPRExcessLimit -= ErrorMargin;
+
+  // We only need to update the RPDelata for instructions that increase
+  // register pressure.  Instructions that decrease or keep reg pressure
+  // the same will be marked as RegExcess in tryCandidate() when they
+  // are compared with instructions that increase the register pressure.
+  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
+    Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
+    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
+  }
+
+  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
+    Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
+    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
+  }
+
+  // Register pressure is considered 'CRITICAL' if it is approaching a value
+  // that would reduce the wave occupancy for the execution unit.  When
+  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
+  // has the same cost, so we don't need to prefer one over the other.
+
+  unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, ST);
+  VGPRCriticalLimit = SRI->getNumVGPRsAllowed(MaxWaves);
+  SGPRCriticalLimit = SRI->getNumSGPRsAllowed(DAG->MF.getSubtarget<SISubtarget>(), MaxWaves);
+
+  VGPRCriticalLimit -= ErrorMargin;
+  SGPRCriticalLimit -= ErrorMargin;
+
+  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
+  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
+
+  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
+    if (SGPRDelta > VGPRDelta) {
+      Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
+      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
+    } else {
+      Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
+      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
+    }
+  }
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNodeFromQueue()
+void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
+                                         const CandPolicy &ZonePolicy,
+                                         const RegPressureTracker &RPTracker,
+                                         SchedCandidate &Cand) {
+  // initCandidate() will make changes to the RPTracker, so we need to pass it
+  // a non-const tracker.  However, initCandidate() will undo any changes it
+  // makes, so TempTracker will be the same before and after the call to
+  // initCandidate().
+  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+  RegisterPressure Pressure = TempTracker.getPressure();
+  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+  std::pair<unsigned, unsigned> CurPressure =
+      SRI->getMaxRegPressure(TempTracker.getRegSetPressureAtPos());
+
+  std::pair<unsigned, unsigned> ExcessLimits = SRI->getRegPressureSetLimits(DAG->MF);
+
+  ReadyQueue &Q = Zone.Available;
+  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+
+    SchedCandidate TryCand(ZonePolicy);
+    initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker, SRI,
+                  CurPressure.first, CurPressure.second, ExcessLimits.first,
+		  ExcessLimits.second, 0, 0);
+    // Pass SchedBoundary only when comparing nodes from the same boundary.
+    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+    //tryCandidate(Cand, TryCand, ZoneArg, SRI, MaxPressure.first, MaxPressure.second);
+    GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
+    if (TryCand.Reason != NoCand) {
+      // Initialize resource delta if needed in case future heuristics query it.
+      if (TryCand.ResDelta == SchedResourceDelta())
+        TryCand.initResourceDelta(Zone.DAG, SchedModel);
+      Cand.setBest(TryCand);
+    }
+  }
+}
+
+static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
+  switch (Reason) {
+  default:
+    return Reason;
+  case GenericSchedulerBase::RegCritical:
+  case GenericSchedulerBase::RegExcess:
+    return -1 * Reason;
+ }
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNodeBidirectional()
+SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
+  // Schedule as far as possible in the direction of no choice. This is most
+  // efficient, but also provides the best heuristics for CriticalPSets.
+  if (SUnit *SU = Bot.pickOnlyChoice()) {
+    IsTopNode = false;
+    return SU;
+  }
+  if (SUnit *SU = Top.pickOnlyChoice()) {
+    IsTopNode = true;
+    return SU;
+  }
+  // Set the bottom-up policy based on the state of the current bottom zone and
+  // the instructions outside the zone, including the top zone.
+  CandPolicy BotPolicy;
+  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
+  // Set the top-down policy based on the state of the current top zone and
+  // the instructions outside the zone, including the bottom zone.
+  CandPolicy TopPolicy;
+  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+  // See if BotCand is still valid (because we previously scheduled from Top).
+  DEBUG(dbgs() << "Picking from Bot:\n");
+  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+      BotCand.Policy != BotPolicy) {
+    BotCand.reset(CandPolicy());
+    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+  } else {
+    DEBUG(traceCandidate(BotCand));
+  }
+
+  // Check if the top Q has a better candidate.
+  DEBUG(dbgs() << "Picking from Top:\n");
+  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+      TopCand.Policy != TopPolicy) {
+    TopCand.reset(CandPolicy());
+    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+  } else {
+    DEBUG(traceCandidate(TopCand));
+  }
+
+  // Pick best from BotCand and TopCand.
+  DEBUG(dbgs() << "Top Cand: ");
+  DEBUG(traceCandidate(BotCand));
+  DEBUG(dbgs() << "Bot Cand: ");
+  DEBUG(traceCandidate(TopCand));
+  SchedCandidate Cand;
+  if (TopCand.Reason == BotCand.Reason) {
+    Cand = BotCand;
+    GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
+    TopCand.Reason = NoCand;
+    GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
+    if (TopCand.Reason != NoCand) {
+      Cand.setBest(TopCand); 
+    } else {
+      TopCand.Reason = TopReason;
+    }
+  } else {
+    if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
+      Cand = TopCand;
+    } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
+      Cand = BotCand;
+    } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
+      Cand = TopCand;
+    } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
+      Cand = BotCand;
+    } else {
+      int TopRank = getBidirectionalReasonRank(TopCand.Reason);
+      int BotRank = getBidirectionalReasonRank(BotCand.Reason);
+      if (TopRank > BotRank) {
+        Cand = TopCand;
+      } else {
+        Cand = BotCand;
+      }
+    }
+  }
+  DEBUG(dbgs() << "Picking: ");
+  DEBUG(traceCandidate(Cand));
+
+  IsTopNode = Cand.AtTop;
+  return Cand.SU;
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNode()
+SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
+  if (DAG->top() == DAG->bottom()) {
+    assert(Top.Available.empty() && Top.Pending.empty() &&
+           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+    return nullptr;
+  }
+  SUnit *SU;
+  do {
+    if (RegionPolicy.OnlyTopDown) {
+      SU = Top.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        TopCand.reset(NoPolicy);
+        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+        assert(TopCand.Reason != NoCand && "failed to find a candidate");
+        SU = TopCand.SU;
+      }
+      IsTopNode = true;
+    } else if (RegionPolicy.OnlyBottomUp) {
+      SU = Bot.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        BotCand.reset(NoPolicy);
+        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+        assert(BotCand.Reason != NoCand && "failed to find a candidate");
+        SU = BotCand.SU;
+      }
+      IsTopNode = false;
+    } else {
+      SU = pickNodeBidirectional(IsTopNode);
+    }
+  } while (SU->isScheduled);
+
+  if (SU->isTopReady())
+    Top.removeReady(SU);
+  if (SU->isBottomReady())
+    Bot.removeReady(SU);
+
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+  return SU;
+}
Index: lib/Target/AMDGPU/SIRegisterInfo.h
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.h
+++ lib/Target/AMDGPU/SIRegisterInfo.h
@@ -48,10 +48,6 @@
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  unsigned getRegPressureSetLimit(const MachineFunction &MF,
-                                  unsigned Idx) const override;
-
-
   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
 
 
Index: lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -230,27 +230,6 @@
   return Reserved;
 }
 
-unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
-                                                unsigned Idx) const {
-  const SISubtarget &STI = MF.getSubtarget<SISubtarget>();
-  // FIXME: We should adjust the max number of waves based on LDS size.
-  unsigned SGPRLimit = getNumSGPRsAllowed(STI, STI.getMaxWavesPerCU());
-  unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
-
-  unsigned VSLimit = SGPRLimit + VGPRLimit;
-
-  if (SGPRPressureSets.test(Idx) && VGPRPressureSets.test(Idx)) {
-    // FIXME: This is a hack. We should never be considering the pressure of
-    // these since no virtual register should ever have this class.
-    return VSLimit;
-  }
-
-  if (SGPRPressureSets.test(Idx))
-    return SGPRLimit;
-
-  return VGPRLimit;
-}
-
 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
   return Fn.getFrameInfo().hasStackObjects();
 }
Index: lib/Target/AMDGPU/SISchedule.td
===================================================================
--- lib/Target/AMDGPU/SISchedule.td
+++ lib/Target/AMDGPU/SISchedule.td
@@ -47,6 +47,10 @@
 
 class SISchedMachineModel : SchedMachineModel {
   let CompleteModel = 0;
+  // MicroOpBufferSize = 1 means that instructions will always be added
+  // the ready queue when they become available.  This exposes them
+  // to the register pressure analysis.
+  let MicroOpBufferSize = 1;
   let IssueWidth = 1;
   let PostRAScheduler = 1;
 }