diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -49,6 +49,8 @@
   const TargetSubtargetInfo *ST;
   const TargetLoweringBase *TLI;
 
+  AMDGPUSubtarget::Generation Gen;
+
   const TargetSubtargetInfo *getST() const { return ST; }
   const TargetLoweringBase *getTLI() const { return TLI; }
 
@@ -57,7 +59,8 @@
       : BaseT(TM, F.getParent()->getDataLayout()),
         TargetTriple(TM->getTargetTriple()),
         ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
-        TLI(ST->getTargetLowering()) {}
+        TLI(ST->getTargetLowering()),
+        Gen(TM->getSubtarget<GCNSubtarget>(F).getGeneration()) {}
 
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -96,6 +96,19 @@
 
   // TODO: Do we want runtime unrolling?
 
+  // Set more aggressive defaults for PAL shaders
+  if (TargetTriple.getOS() == Triple::AMDPAL) {
+    UP.MaxPercentThresholdBoost = 1000;
+    // and even more aggressive for GFX10
+    if (Gen >= AMDGPUSubtarget::GFX10) {
+      UP.Threshold = 1100;
+      UP.PartialThreshold = 1100;
+    } else {
+      UP.Threshold = 700;
+      UP.PartialThreshold = 700;
+    }
+  }
+
   // Maximum alloca size than can fit registers. Reserve 16 registers.
   const unsigned MaxAlloca = (256 - 16) * 4;
   unsigned ThresholdPrivate = UnrollThresholdPrivate;