diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -493,7 +493,7 @@
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = MFI->getMinAllowedOccupancy();
+  auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -581,7 +581,7 @@
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = MFI->getMinAllowedOccupancy();
+  auto TgtOcc = MFI->getMinAllowedOccupancy(ST);
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -358,9 +358,9 @@
   // Allow memory bound functions to drop to 4 waves if not limited by an
   // attribute.
   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
-      WavesAfter >= MFI.getMinAllowedOccupancy()) {
+      WavesAfter >= MFI.getMinAllowedOccupancy(ST)) {
     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
-                      << MFI.getMinAllowedOccupancy() << " waves\n");
+                      << MFI.getMinAllowedOccupancy(ST) << " waves\n");
     NewOccupancy = WavesAfter;
   }
   if (NewOccupancy < MinOccupancy) {
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -251,7 +251,7 @@
   RPT.advanceToNext();
   GCNRegPressure MaxPressure = RPT.moveMaxPressure();
   unsigned Occupancy = MaxPressure.getOccupancy(*ST);
-  if (Occupancy >= MFI->getMinAllowedOccupancy() &&
+  if (Occupancy >= MFI->getMinAllowedOccupancy(*ST) &&
       MaxPressure.getVGPRNum() <= MaxVGPRs &&
       MaxPressure.getSGPRNum() <= MaxSGPRs) {
     LastRecordedOccupancy = Occupancy;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -36,6 +36,7 @@
 
 namespace llvm {
 
+class GCNSubtarget;
 class MachineFrameInfo;
 class MachineFunction;
 class TargetRegisterClass;
@@ -915,11 +916,7 @@
     return Occupancy;
   }
 
-  unsigned getMinAllowedOccupancy() const {
-    if (!isMemoryBound() && !needsWaveLimiter())
-      return Occupancy;
-    return (Occupancy < 4) ? Occupancy : 4;
-  }
+  unsigned getMinAllowedOccupancy(const GCNSubtarget &ST) const;
 
   void limitOccupancy(const MachineFunction &MF);
 
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -190,6 +190,17 @@
     S.consumeInteger(0, GDSSize);
 }
 
+unsigned SIMachineFunctionInfo::getMinAllowedOccupancy(const GCNSubtarget &ST) const {
+  if (!isMemoryBound() && !needsWaveLimiter())
+    return Occupancy;
+  // Allow a minimum of 16 threads per SIMD lane, which works out as:
+  // - 4 waves per SIMD for GFX9 and below
+  // - 8 waves per SIMD for GFX10 wave64
+  // - 16 waves per SIMD for GFX10 wave32
+  unsigned MinOccupancy = ST.getTotalNumVGPRs() / 64;
+  return std::min(Occupancy, MinOccupancy);
+}
+
 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
   limitOccupancy(getMaxWavesPerEU());
   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();