Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1029,18 +1029,26 @@ // Account for extra SGPRs and VGPRs reserved for debugger use. ProgInfo.NumSGPR += ExtraSGPRs; + const Function &F = MF.getFunction(); + // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave // dispatch registers are function args. unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0; - for (auto &Arg : MF.getFunction().args()) { - unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32; - if (Arg.hasAttribute(Attribute::InReg)) - WaveDispatchNumSGPR += NumRegs; - else - WaveDispatchNumVGPR += NumRegs; + + if (isShader(F.getCallingConv())) { + // FIXME: We should be using the number of registers determined during + // calling convention lowering to legalize the types. + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto &Arg : F.args()) { + unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32; + if (Arg.hasAttribute(Attribute::InReg)) + WaveDispatchNumSGPR += NumRegs; + else + WaveDispatchNumVGPR += NumRegs; + } + ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR); + ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR); } - ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR); - ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR); // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. Index: llvm/test/CodeGen/AMDGPU/code-object-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/code-object-v3.ll +++ llvm/test/CodeGen/AMDGPU/code-object-v3.ll @@ -50,8 +50,9 @@ ; OSABI-AMDHSA-ELF: .rodata PROGBITS {{[0-9]+}} {{[0-9]+}} {{[0-9a-f]+}} {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 ; OSABI-AMDHSA-ELF: Relocation section '.rela.rodata' at offset -; OSABI-AMDHSA-ELF: 0000000000000010 0000000100000005 R_AMDGPU_REL64 0000000000000000 fadd + 10 -; OSABI-AMDHSA-ELF: 0000000000000050 0000000300000005 R_AMDGPU_REL64 0000000000000100 fsub + 10 +; OSABI-AMDHSA-ELF: 0000000000000010 0000000300000005 R_AMDGPU_REL64 0000000000000000 fadd + 10 +; OSABI-AMDHSA-ELF: 0000000000000050 0000000500000005 R_AMDGPU_REL64 0000000000000100 fsub + 10 +; OSABI-AMDHSA-ELF: 0000000000000090 0000000100000005 R_AMDGPU_REL64 0000000000000200 empty + 10 ; OSABI-AMDHSA-ELF: Symbol table '.symtab' contains {{[0-9]+}} entries ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 {{[0-9]+}} FUNC GLOBAL PROTECTED {{[0-9]+}} fadd @@ -85,3 +86,18 @@ store float %r.val, float addrspace(1)* %r ret void } + +; Make sure kernel arguments do not count towards the number of +; registers used. +; +; ALL-ASM-LABEL: {{^}}empty: +; ALL-ASM: .amdhsa_next_free_vgpr 1 +; ALL-ASM: .amdhsa_next_free_sgpr 1 +define amdgpu_kernel void @empty( + i32 %i, + float addrspace(1)* %r, + float addrspace(1)* %a, + float addrspace(1)* %b) { +entry: + ret void +}