diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -46,7 +46,7 @@
 
   Triple TargetTriple;
 
-  const TargetSubtargetInfo *ST;
+  const GCNSubtarget *ST;
   const TargetLoweringBase *TLI;
 
   const TargetSubtargetInfo *getST() const { return ST; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -96,10 +96,23 @@
 
   // TODO: Do we want runtime unrolling?
 
+  // Set more aggressive defaults for PAL shaders
+  if (TargetTriple.getOS() == Triple::AMDPAL) {
+    UP.MaxPercentThresholdBoost = 1000;
+    // and even more aggressive for GFX10
+    if (ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
+      UP.Threshold = 1100;
+      UP.PartialThreshold = 1100;
+    } else {
+      UP.Threshold = 700;
+      UP.PartialThreshold = 700;
+    }
+  }
+
   // Maximum alloca size than can fit registers. Reserve 16 registers.
   const unsigned MaxAlloca = (256 - 16) * 4;
   unsigned ThresholdPrivate = UnrollThresholdPrivate;
-  unsigned ThresholdLocal = UnrollThresholdLocal;
+  unsigned ThresholdLocal = std::max((unsigned)UnrollThresholdLocal, UP.Threshold);
   unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
   for (const BasicBlock *BB : L->getBlocks()) {
     const DataLayout &DL = BB->getModule()->getDataLayout();