Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -18,6 +18,7 @@ #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h" +#include "SIProgramInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" @@ -52,60 +53,6 @@ int32_t getTotalNumSGPRs(const SISubtarget &ST) const; }; - // Track resource usage for kernels / entry functions. - struct SIProgramInfo { - // Fields set in PGM_RSRC1 pm4 packet. - uint32_t VGPRBlocks = 0; - uint32_t SGPRBlocks = 0; - uint32_t Priority = 0; - uint32_t FloatMode = 0; - uint32_t Priv = 0; - uint32_t DX10Clamp = 0; - uint32_t DebugMode = 0; - uint32_t IEEEMode = 0; - uint64_t ScratchSize = 0; - - uint64_t ComputePGMRSrc1 = 0; - - // Fields set in PGM_RSRC2 pm4 packet. - uint32_t LDSBlocks = 0; - uint32_t ScratchBlocks = 0; - - uint64_t ComputePGMRSrc2 = 0; - - uint32_t NumVGPR = 0; - uint32_t NumSGPR = 0; - uint32_t LDSSize = 0; - bool FlatUsed = false; - - // Number of SGPRs that meets number of waves per execution unit request. - uint32_t NumSGPRsForWavesPerEU = 0; - - // Number of VGPRs that meets number of waves per execution unit request. - uint32_t NumVGPRsForWavesPerEU = 0; - - // Fixed SGPR number used to hold wave scratch offset for entire kernel - // execution, or std::numeric_limits::max() if the register is not - // used or not known. - uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR = - std::numeric_limits::max(); - - // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire - // kernel execution, or std::numeric_limits::max() if the register - // is not used or not known. - uint16_t DebuggerPrivateSegmentBufferSGPR = - std::numeric_limits::max(); - - // Whether there is recursion, dynamic allocas, indirect calls or some other - // reason there may be statically unknown stack usage. - bool DynamicCallStack = false; - - // Bonus information for debugging. - bool VCCUsed = false; - - SIProgramInfo() = default; - }; - SIProgramInfo CurrentProgramInfo; DenseMap CallGraphResourceInfo; @@ -123,13 +70,6 @@ unsigned &NumSGPR, unsigned &NumVGPR) const; - AMDGPU::HSAMD::Kernel::CodeProps::Metadata getHSACodeProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const; - AMDGPU::HSAMD::Kernel::DebugProps::Metadata getHSADebugProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const; - /// Emit register usage information so that the GPU driver /// can correctly setup the GPU state. void EmitProgramInfoSI(const MachineFunction &MF, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -207,9 +207,7 @@ if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - HSAMetadataStream.emitKernel(MF->getFunction(), - getHSACodeProps(*MF, CurrentProgramInfo), - getHSADebugProps(*MF, CurrentProgramInfo)); + HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); } void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { @@ -1197,57 +1195,6 @@ } } -AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const { - const SISubtarget &STM = MF.getSubtarget(); - const SIMachineFunctionInfo &MFI = *MF.getInfo(); - HSAMD::Kernel::CodeProps::Metadata HSACodeProps; - const Function &F = MF.getFunction(); - - // Avoid asserting on erroneous cases. - if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) - return HSACodeProps; - - HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F); - HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; - HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; - HSACodeProps.mKernargSegmentAlign = - std::max(uint32_t(4), MFI.getMaxKernArgAlign()); - HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); - HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR; - HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR; - HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); - HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; - HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); - HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); - HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); - - return HSACodeProps; -} - -AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const { - const SISubtarget &STM = MF.getSubtarget(); - HSAMD::Kernel::DebugProps::Metadata HSADebugProps; - - if (!STM.debuggerSupported()) - return HSADebugProps; - - HSADebugProps.mDebuggerABIVersion.push_back(1); - HSADebugProps.mDebuggerABIVersion.push_back(0); - - if (STM.debuggerEmitPrologue()) { - HSADebugProps.mPrivateSegmentBufferSGPR = - ProgramInfo.DebuggerPrivateSegmentBufferSGPR; - HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = - ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - } - - return HSADebugProps; -} - bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h @@ -28,6 +28,7 @@ class Function; class MDNode; class Module; +class SIProgramInfo; class Type; namespace AMDGPU { @@ -55,6 +56,13 @@ std::vector getWorkGroupDimensions(MDNode *Node) const; + Kernel::CodeProps::Metadata getHSACodeProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const; + Kernel::DebugProps::Metadata getHSADebugProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const; + void emitVersion(); void emitPrintf(const Module &Mod); @@ -87,9 +95,7 @@ void end(); - void emitKernel(const Function &Func, - const Kernel::CodeProps::Metadata &CodeProps, - const Kernel::DebugProps::Metadata &DebugProps); + void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo); }; } // end namespace HSAMD Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp @@ -14,6 +14,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPUHSAMetadataStreamer.h" +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIMachineFunctionInfo.h" +#include "SIProgramInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" @@ -196,6 +200,57 @@ return Dims; } +Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const SISubtarget &STM = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + HSAMD::Kernel::CodeProps::Metadata HSACodeProps; + const Function &F = MF.getFunction(); + + // Avoid asserting on erroneous cases. + if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return HSACodeProps; + + HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F); + HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; + HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; + HSACodeProps.mKernargSegmentAlign = + std::max(uint32_t(4), MFI.getMaxKernArgAlign()); + HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); + HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; + HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; + HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); + HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; + HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); + HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); + HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); + + return HSACodeProps; +} + +Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const SISubtarget &STM = MF.getSubtarget(); + HSAMD::Kernel::DebugProps::Metadata HSADebugProps; + + if (!STM.debuggerSupported()) + return HSADebugProps; + + HSADebugProps.mDebuggerABIVersion.push_back(1); + HSADebugProps.mDebuggerABIVersion.push_back(0); + + if (STM.debuggerEmitPrologue()) { + HSADebugProps.mPrivateSegmentBufferSGPR = + ProgramInfo.DebuggerPrivateSegmentBufferSGPR; + HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = + ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; + } + + return HSADebugProps; +} + void MetadataStreamer::emitVersion() { auto &Version = HSAMetadata.mVersion; @@ -408,10 +463,11 @@ verify(HSAMetadataString); } -void MetadataStreamer::emitKernel( - const Function &Func, - const Kernel::CodeProps::Metadata &CodeProps, - const Kernel::DebugProps::Metadata &DebugProps) { +void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { + auto &Func = MF.getFunction(); + auto CodeProps = getHSACodeProps(MF, ProgramInfo); + auto DebugProps = getHSADebugProps(MF, ProgramInfo); + if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; Index: llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h @@ -0,0 +1,77 @@ +//===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Defines struct to track resource usage for kernels and entry functions. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H +#define LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H + +namespace llvm { + +/// Track resource usage for kernels / entry functions. +struct SIProgramInfo { + // Fields set in PGM_RSRC1 pm4 packet. + uint32_t VGPRBlocks = 0; + uint32_t SGPRBlocks = 0; + uint32_t Priority = 0; + uint32_t FloatMode = 0; + uint32_t Priv = 0; + uint32_t DX10Clamp = 0; + uint32_t DebugMode = 0; + uint32_t IEEEMode = 0; + uint64_t ScratchSize = 0; + + uint64_t ComputePGMRSrc1 = 0; + + // Fields set in PGM_RSRC2 pm4 packet. + uint32_t LDSBlocks = 0; + uint32_t ScratchBlocks = 0; + + uint64_t ComputePGMRSrc2 = 0; + + uint32_t NumVGPR = 0; + uint32_t NumSGPR = 0; + uint32_t LDSSize = 0; + bool FlatUsed = false; + + // Number of SGPRs that meets number of waves per execution unit request. + uint32_t NumSGPRsForWavesPerEU = 0; + + // Number of VGPRs that meets number of waves per execution unit request. + uint32_t NumVGPRsForWavesPerEU = 0; + + // Fixed SGPR number used to hold wave scratch offset for entire kernel + // execution, or std::numeric_limits::max() if the register is not + // used or not known. + uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR = + std::numeric_limits::max(); + + // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire + // kernel execution, or std::numeric_limits::max() if the register + // is not used or not known. + uint16_t DebuggerPrivateSegmentBufferSGPR = + std::numeric_limits::max(); + + // Whether there is recursion, dynamic allocas, indirect calls or some other + // reason there may be statically unknown stack usage. + bool DynamicCallStack = false; + + // Bonus information for debugging. + bool VCCUsed = false; + + SIProgramInfo() = default; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H