diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -11251,10 +11251,8 @@ ".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value 2 integers is implementation defined, and can not be relied on between different builds of the compiler. - ".scratch_memory_size" sequence of Size in bytes of scratch memory used by the shader. - 2 integers - ".lds_size" sequence of Size in bytes of LDS memory. - 2 integers + ".scratch_memory_size" integer Size in bytes of scratch memory used by the shader. + ".lds_size" integer Size in bytes of LDS memory. ".vgpr_count" integer Number of VGPRs used by the shader. ".sgpr_count" integer Number of SGPRs used by the shader. ".stack_frame_size_in_bytes" integer Amount of stack size used by the shader. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1395,10 +1395,16 @@ auto *MD = getTargetStreamer()->getPALMetadata(); const MachineFrameInfo &MFI = MF.getFrameInfo(); MD->setFunctionScratchSize(MF, MFI.getStackSize()); + // Set compute registers MD->setRsrc1(CallingConv::AMDGPU_CS, CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS)); MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2); + + // Set optional info + MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize); + MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU); + MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU); } // This is supposed to be log2(Size) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -80,6 +80,21 @@ // Set the stack frame size of a function in the metadata. void setFunctionScratchSize(const MachineFunction &MF, unsigned Val); + // Set the amount of LDS used in bytes in the metadata. This is an optional + // advisory record for logging etc; wave dispatch actually uses the rsrc1 + // register for the shader stage to determine the amount of LDS to allocate. + void setFunctionLdsSize(const MachineFunction &MF, unsigned Val); + + // Set the number of used vgprs in the metadata. This is an optional advisory + // record for logging etc; wave dispatch actually uses the rsrc1 register for + // the shader stage to determine the number of vgprs to allocate. + void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val); + + // Set the number of used sgprs in the metadata. This is an optional advisory + // record for logging etc; wave dispatch actually uses the rsrc1 register for + // the shader stage to determine the number of sgprs to allocate. + void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val); + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void setWave32(unsigned CC); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -243,6 +243,27 @@ Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val); } +// Set the amount of LDS used in bytes in the metadata. +void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF, + unsigned Val) { + auto Node = getShaderFunction(MF.getFunction().getName()); + Node[".lds_size"] = MsgPackDoc.getNode(Val); +} + +// Set the number of used vgprs in the metadata. +void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF, + unsigned Val) { + auto Node = getShaderFunction(MF.getFunction().getName()); + Node[".vgpr_count"] = MsgPackDoc.getNode(Val); +} + +// Set the number of used vgprs in the metadata. +void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF, + unsigned Val) { + auto Node = getShaderFunction(MF.getFunction().getName()); + Node[".sgpr_count"] = MsgPackDoc.getNode(Val); +} + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void AMDGPUPALMetadata::setWave32(unsigned CC) { diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s -; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s +; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s ; Make sure this interacts well with -amdgpu-fixed-function-abi -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s declare float @extern_func(float) #0 declare float @extern_func_many_args(<64 x float>) #0 @@ -147,40 +147,92 @@ ; GCN: amdpal.pipelines: ; GCN-NEXT: - .registers: -; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} -; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} -; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} -; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} +; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} +; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: +; GCN-NEXT: .lds_size: 0{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; SDAG-NEXT: .vgpr_count: 0x2{{$}} +; GISEL-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: dynamic_stack_loop: +; GCN-NEXT: .lds_size: 0{{$}} +; SDAG-NEXT: .sgpr_count: 0x22{{$}} +; GISEL-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; SDAG-NEXT: .vgpr_count: 0x3{{$}} +; GISEL-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: multiple_stack: +; GCN-NEXT: .lds_size: 0{{$}} +; GCN-NEXT: .sgpr_count: 0x21{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}} +; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack: +; GCN-NEXT: .lds_size: 0{{$}} +; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} +; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: +; GCN-NEXT: .lds_size: 0{{$}} +; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} +; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_extern_call: +; GCN-NEXT: .lds_size: 0{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: no_stack_extern_call_many_args: -; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} -; GISEL-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} +; GCN-NEXT: .lds_size: 0{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} +; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: no_stack_indirect_call: +; GCN-NEXT: .lds_size: 0{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_lds: +; GCN-NEXT: .lds_size: 0x100{{$}} +; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} +; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: simple_lds_recurse: +; GCN-NEXT: .lds_size: 0x100{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_stack: +; GCN-NEXT: .lds_size: 0{{$}} +; GCN-NEXT: .sgpr_count: 0x21{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}} +; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: +; GCN-NEXT: .lds_size: 0{{$}} +; GCN-NEXT: .sgpr_count: 0x22{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} +; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: simple_stack_extern_call: +; GCN-NEXT: .lds_size: 0{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_stack_indirect_call: +; GCN-NEXT: .lds_size: 0{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_stack_recurse: +; GCN-NEXT: .lds_size: 0{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x66{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: ...