diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -49,6 +49,8 @@ const TargetSubtargetInfo *ST; const TargetLoweringBase *TLI; + AMDGPUSubtarget::Generation Gen; + const TargetSubtargetInfo *getST() const { return ST; } const TargetLoweringBase *getTLI() const { return TLI; } @@ -57,7 +59,8 @@ : BaseT(TM, F.getParent()->getDataLayout()), TargetTriple(TM->getTargetTriple()), ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))), - TLI(ST->getTargetLowering()) {} + TLI(ST->getTargetLowering()), + Gen(TM->getSubtarget<GCNSubtarget>(F).getGeneration()) {} void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -96,6 +96,19 @@ // TODO: Do we want runtime unrolling? + // Set more aggressive defaults for PAL shaders + if (TargetTriple.getOS() == Triple::AMDPAL) { + UP.MaxPercentThresholdBoost = 1000; + // and even more aggressive for GFX10 + if (Gen >= AMDGPUSubtarget::GFX10) { + UP.Threshold = 1100; + UP.PartialThreshold = 1100; + } else { + UP.Threshold = 700; + UP.PartialThreshold = 700; + } + } + // Maximum alloca size than can fit registers. Reserve 16 registers. const unsigned MaxAlloca = (256 - 16) * 4; unsigned ThresholdPrivate = UnrollThresholdPrivate;