Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -652,7 +652,8 @@ return 16; // Assume all implicit inputs are used by default - return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56); + unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56; + return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes); } uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, Index: llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-size.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-size.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=CHECK,COV3 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK,COV5 %s + +; CHECK-LABEL: kernel_with_implicitarg_ptr: +; COV3: .amdhsa_kernarg_size 64 +; COV5: .amdhsa_kernarg_size 264 +define amdgpu_kernel void @kernel_with_implicitarg_ptr(i8 addrspace(4)* addrspace(1)* %ptr) { + %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* %ptr + ret void +} + +; CHECK-LABEL: kernel_without_implicitarg_ptr: +; CHECK: .amdhsa_kernarg_size 8 +define amdgpu_kernel void @kernel_without_implicitarg_ptr(i8 addrspace(4)* addrspace(1)* %ptr) { + store volatile i8 addrspace(4)* undef, i8 addrspace(4)* addrspace(1)* %ptr + ret void +} + +declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()