Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -283,6 +283,13 @@ bool enableSubRegLiveness() const override { return true; } + + /// \brief Returns the offset in bytes from the start of the input buffer + /// of the first explicit kernel argument. + unsigned getExplicitKernelArgOffset() const { + return isAmdHsaOS() ? 0 : 36; + } + }; } // End namespace llvm Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -583,7 +583,8 @@ if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = Splits[i].VT; - const unsigned Offset = 36 + VA.getLocMemOffset(); + const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), Index: test/CodeGen/AMDGPU/hsa.ll =================================================================== --- test/CodeGen/AMDGPU/hsa.ll +++ test/CodeGen/AMDGPU/hsa.ll @@ -20,6 +20,7 @@ ; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0" ; Test that the amd_kernel_code_t object is emitted ; HSA: .asciz +; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0 ; Make sure we are setting the ATC bit: ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0