diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -456,7 +456,7 @@ Info = analyzeResourceUsage(MF); } - if (STM.isAmdPalOS() && MFI->isEntryFunction()) + if (STM.isAmdPalOS()) EmitPALMetadata(MF, CurrentProgramInfo); else if (!STM.isAmdHsaOS()) { EmitProgramInfoSI(MF, CurrentProgramInfo); @@ -1241,6 +1241,11 @@ auto CC = MF.getFunction().getCallingConv(); auto MD = getTargetStreamer()->getPALMetadata(); + if (!AMDGPU::isEntryFunctionCC(CC)) { + MD->setStackFrameSize(MF, CurrentProgramInfo.ScratchSize); + return; + } + MD->setEntryPoint(CC, MF.getFunction().getName()); MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU); MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H #include "llvm/BinaryFormat/MsgPackDocument.h" +#include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -26,6 +27,7 @@ msgpack::Document MsgPackDoc; msgpack::DocNode Registers; msgpack::DocNode HwStages; + msgpack::DocNode ShaderFunctions; public: // Read the amdgpu.pal.metadata supplied by the frontend, ready for @@ -76,6 +78,9 @@ // Set the scratch size in the metadata. void setScratchSize(unsigned CC, unsigned Val); + // Set the stack frame size of a function in the metadata. + void setStackFrameSize(const MachineFunction &MF, unsigned Val); + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void setWave32(unsigned CC); @@ -119,6 +124,12 @@ // Get (create if necessary) the registers map. msgpack::MapDocNode getRegisters(); + // Reference (create if necessary) the node for the shader functions map. + msgpack::DocNode &refShaderFunctions(); + + // Get (create if necessary) the shader functions map. + msgpack::MapDocNode getShaderFunctions(); + // Get (create if necessary) the .hardware_stages entry for the given calling // convention. msgpack::MapDocNode getHwStage(unsigned CC); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -238,6 +238,14 @@ getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val); } +// Set the scratch size in the metadata. +void AMDGPUPALMetadata::setStackFrameSize(const MachineFunction &MF, + unsigned Val) { + auto Node = MsgPackDoc.getMapNode(); + Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val); + getShaderFunctions()[MF.getFunction().getName()] = Node; +} + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void AMDGPUPALMetadata::setWave32(unsigned CC) { @@ -721,6 +729,24 @@ return Registers.getMap(); } +// Reference (create if necessary) the node for the shader functions map. +msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() { + auto &N = + MsgPackDoc.getRoot() + .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")] + .getArray(/*Convert=*/true)[0] + .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".shader_functions")]; + N.getMap(/*Convert=*/true); + return N; +} + +// Get (create if necessary) the shader functions map. +msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() { + if (ShaderFunctions.isEmpty()) + ShaderFunctions = refShaderFunctions(); + return ShaderFunctions.getMap(); +} + // Return the PAL metadata hardware shader stage name. static const char *getStageName(CallingConv::ID CC) { switch (CC) { diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -3,7 +3,11 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; GCN-LABEL: {{^}}gfx_callable_amdpal: -; GCN: .amd_amdgpu_pal_metadata {{$}} +; GCN: amdpal.pipelines: +; GCN: - .registers: {} +; GCN: .shader_functions: +; GCN: gfx_callable_amdpal: +; GCN: .stack_frame_size_in_bytes: 0 define amdgpu_gfx half @gfx_callable_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add