diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -124,6 +124,14 @@ } }; +unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) { + if (!TM.getTargetTriple().isAMDGCN()) + return 128; + + const GCNSubtarget &ST = TM.getSubtarget(F); + return ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first); +} + } // end anonymous namespace char AMDGPUPromoteAlloca::ID = 0; @@ -176,16 +184,7 @@ if (!ST.isPromoteAllocaEnabled()) return false; - if (IsAMDGCN) { - const GCNSubtarget &ST = TM.getSubtarget(F); - MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first); - // A non-entry function has only 32 caller preserved registers. - // Do not promote alloca which will force spilling. - if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) - MaxVGPRs = std::min(MaxVGPRs, 32u); - } else { - MaxVGPRs = 128; - } + MaxVGPRs = getMaxVGPRs(TM, F); bool SufficientLDS = hasSufficientLocalMem(F); bool Changed = false; @@ -1200,17 +1199,7 @@ if (!ST.isPromoteAllocaEnabled()) return false; - unsigned MaxVGPRs; - if (TM.getTargetTriple().getArch() == Triple::amdgcn) { - const GCNSubtarget &ST = TM.getSubtarget(F); - MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first); - // A non-entry function has only 32 caller preserved registers. - // Do not promote alloca which will force spilling. - if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) - MaxVGPRs = std::min(MaxVGPRs, 32u); - } else { - MaxVGPRs = 128; - } + const unsigned MaxVGPRs = getMaxVGPRs(TM, F); bool Changed = false; BasicBlock &EntryBB = *F.begin(); diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll --- a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll @@ -139,8 +139,8 @@ } ; OPT-LABEL: @func_alloca_9xi64_max256( -; OPT: alloca -; OPT-NOT: <9 x i64> +; OPT-NOT: alloca +; OPT: <9 x i64> ; LIMIT32: alloca ; LIMIT32-NOT: <9 x i64> define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {