Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -801,6 +801,8 @@ auto &DL = M->getDataLayout(); auto Int64Ty = Type::getInt64Ty(Func.getContext()); + Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); + if (HiddenArgNumBytes >= 8) emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); @@ -973,6 +975,11 @@ msgpack::ArrayDocNode Args) { auto &Func = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); + + // No implicit kernel argument is used. + if (ST.getImplicitArgNumBytes(Func) == 0) + return; + const Module *M = Func.getParent(); auto &DL = M->getDataLayout(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -981,6 +988,7 @@ auto Int32Ty = Type::getInt32Ty(Func.getContext()); auto Int16Ty = Type::getInt16Ty(Func.getContext()); + Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll @@ -24,8 +24,8 @@ } ; CHECK: - .args: -; CHECK: .value_kind: hidden_multigrid_sync_arg -; PRE-GFX9: .offset: 200 +; PRE-GFX9: .value_kind: hidden_multigrid_sync_arg +; PRE-GFX9-NEXT: .offset: 200 ; PRE-GFX9-NEXT: .size: 4 ; PRE-GFX9-NEXT: .value_kind: hidden_private_base ; PRE-GFX9-NEXT: .offset: 204 @@ -44,8 +44,8 @@ } ; CHECK: - .args: -; CHECK: .value_kind: hidden_multigrid_sync_arg -; PRE-GFX9: .offset: 200 +; PRE-GFX9: .value_kind: hidden_multigrid_sync_arg +; PRE-GFX9-NEXT: .offset: 200 ; PRE-GFX9-NEXT: .size: 4 ; PRE-GFX9-NEXT: .value_kind: hidden_private_base ; PRE-GFX9-NEXT: .offset: 204 @@ -64,8 +64,8 @@ } ; CHECK: - .args: -; CHECK: .value_kind: hidden_multigrid_sync_arg -; PRE-GFX9: .offset: 192 +; PRE-GFX9: .value_kind: hidden_multigrid_sync_arg +; PRE-GFX9-NEXT: .offset: 192 ; PRE-GFX9-NEXT: .size: 4 ; PRE-GFX9-NEXT: .value_kind: hidden_private_base ; PRE-GFX9-NEXT: .offset: 196 Index: llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll +++ llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll @@ -21,7 +21,7 @@ ; CHECK-LABEL: amdhsa.kernels: ; CHECK: - .args: ; CHECK-NEXT: - .name: four -; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 ; CHECK-NEXT: .value_kind: by_value ; CHECK-NEXT: - .offset: 8 @@ -56,4 +56,3 @@ ; CHECK-LABEL: .name: test_aligned_to_eight declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() -