Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -837,6 +837,19 @@ ProgInfo.NumSGPR += ExtraSGPRs; ProgInfo.NumVGPR += ExtraVGPRs; + // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave + // dispatch registers are function args. + unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0; + for (auto &Arg : MF.getFunction().args()) { + unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32; + if (Arg.hasAttribute(Attribute::InReg)) + WaveDispatchNumSGPR += NumRegs; + else + WaveDispatchNumVGPR += NumRegs; + } + ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR); + ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR); + // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. ProgInfo.NumSGPRsForWavesPerEU = std::max( Index: llvm/trunk/test/CodeGen/AMDGPU/wave_dispatch_regs.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/wave_dispatch_regs.ll +++ llvm/trunk/test/CodeGen/AMDGPU/wave_dispatch_regs.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; This compute shader has input args that claim that it has 17 sgprs and 5 vgprs +; in wave dispatch. Ensure that the sgpr and vgpr counts in COMPUTE_PGM_RSRC1 +; are set to reflect that, even though the registers are not used in the shader. + +; GCN-LABEL: {{^}}_amdgpu_cs_main: +; SI: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}81, +; VI: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}c1, +; GFX9: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}81, + +define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg, i32 inreg, <2 x i32> inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <5 x i32>) { +.entry: + ret void +} +