Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -89,6 +90,8 @@ }; void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const; + void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, + const MachineFunction &MF) const; void findNumUsedRegistersSI(const MachineFunction &MF, unsigned &NumSGPR, unsigned &NumVGPR) const; @@ -97,8 +100,6 @@ /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); - void EmitAmdKernelCodeT(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) const; public: explicit AMDGPUAsmPrinter(TargetMachine &TM, Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -21,7 +21,6 @@ #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" -#include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" @@ -144,14 +143,18 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; + amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); - EmitAmdKernelCodeT(*MF, KernelInfo); + getAmdKernelCode(KernelCode, KernelInfo, *MF); + + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + getTargetStreamer().EmitAMDKernelCodeT(KernelCode); } if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - getTargetStreamer().EmitKernelRuntimeMetadata(*MF->getFunction()); + getTargetStreamer().EmitKernelRuntimeMetadata(*MF->getFunction(), KernelCode); } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { @@ -731,94 +734,90 @@ } } -void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) const { +void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, + const SIProgramInfo &KernelInfo, + const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo(); const SISubtarget &STM = MF.getSubtarget(); - amd_kernel_code_t header; - AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); - header.compute_pgm_resource_registers = + Out.compute_pgm_resource_registers = KernelInfo.ComputePGMRSrc1 | (KernelInfo.ComputePGMRSrc2 << 32); - header.code_properties = AMD_CODE_PROPERTY_IS_PTR64; + Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64; - - AMD_HSA_BITS_SET(header.code_properties, + AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize())); if (MFI->hasPrivateSegmentBuffer()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } if (MFI->hasDispatchPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (MFI->hasQueuePtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; if (MFI->hasKernargSegmentPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; if (MFI->hasDispatchID()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; if (MFI->hasFlatScratchInit()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; // TODO: Private segment size if (MFI->hasGridWorkgroupCountX()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X; } if (MFI->hasGridWorkgroupCountY()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y; } if (MFI->hasGridWorkgroupCountZ()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z; } if (MFI->hasDispatchPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (STM.debuggerSupported()) - header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; + Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; if (STM.isXNACKEnabled()) - header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; + Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; // FIXME: Should use getKernArgSize - header.kernarg_segment_byte_size = + Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); - header.wavefront_sgpr_count = KernelInfo.NumSGPR; - header.workitem_vgpr_count = KernelInfo.NumVGPR; - header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; - header.workgroup_group_segment_byte_size = KernelInfo.LDSSize; - header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; - header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + Out.wavefront_sgpr_count = KernelInfo.NumSGPR; + Out.workitem_vgpr_count = KernelInfo.NumVGPR; + Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize; + Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize; + Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; + Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. - header.kernarg_segment_alignment = std::max((size_t)4, + Out.kernarg_segment_alignment = std::max((size_t)4, countTrailingZeros(MFI->getMaxKernArgAlign())); if (STM.debuggerEmitPrologue()) { - header.debug_wavefront_private_segment_offset_sgpr = + Out.debug_wavefront_private_segment_offset_sgpr = KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - header.debug_private_segment_buffer_sgpr = + Out.debug_private_segment_buffer_sgpr = KernelInfo.DebuggerPrivateSegmentBufferSGPR; } - - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - getTargetStreamer().EmitAMDKernelCodeT(header); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMETADATASTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMETADATASTREAMER_H +#include "AMDKernelCodeT.h" #include "Shared/AMDGPURuntimeMetadata.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" @@ -79,6 +80,8 @@ StringRef AccQual = "", StringRef Name = "", StringRef TypeName = ""); + void emitKernelStats(const amd_kernel_code_t &KernelCode); + public: Streamer() = default; ~Streamer() = default; @@ -87,7 +90,7 @@ void end() {} - void emitKernel(const Function &Func); + void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode); ErrorOr toYamlString(); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.cpp @@ -181,6 +181,50 @@ }; template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Stats::Metadata &MD) { + YIO.mapOptional(Kernel::Stats::Key::WavefrontNumSGPRs, + MD.WavefrontNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::WorkitemNumVGPRs, + MD.WorkitemNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::WorkitemPrivateSegmentByteSize, + MD.WorkitemPrivateSegmentByteSize, uint32_t(0)); + YIO.mapOptional(Kernel::Stats::Key::WorkgroupGroupSegmentByteSize, + MD.WorkgroupGroupSegmentByteSize, uint32_t(0)); + YIO.mapOptional(Kernel::Stats::Key::GdsSegmentByteSize, + MD.GdsSegmentByteSize, uint32_t(0)); + YIO.mapOptional(Kernel::Stats::Key::KernargSegmentByteSize, + MD.KernargSegmentByteSize, uint64_t(0)); + YIO.mapOptional(Kernel::Stats::Key::WorkgroupNumFbarriers, + MD.WorkgroupNumFbarriers, uint32_t(0)); + YIO.mapOptional(Kernel::Stats::Key::ReservedNumVGPRs, + MD.ReservedNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::ReservedFirstVGPR, + MD.ReservedFirstVGPR, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::ReservedNumSGPRs, + MD.ReservedNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::ReservedFirstSGPR, + MD.ReservedFirstSGPR, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::DebugWavefrontPrivateSegmentOffsetSGPR, + MD.DebugWavefrontPrivateSegmentOffsetSGPR, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::DebugPrivateSegmentBufferSGPR, + MD.DebugPrivateSegmentBufferSGPR, uint16_t(0)); + YIO.mapOptional(Kernel::Stats::Key::KernargSegmentAlign, + MD.KernargSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::Stats::Key::GroupSegmentAlign, + MD.GroupSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::Stats::Key::PrivateSegmentAlign, + MD.PrivateSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::Stats::Key::WavefrontSize, + MD.WavefrontSize, uint8_t(0)); + YIO.mapOptional(Kernel::Stats::Key::CallConvention, + MD.CallConvention, int32_t(-1)); + YIO.mapOptional(Kernel::Stats::Key::IsDebugEnabled, + MD.IsDebugEnabled, false); + } +}; + +template <> struct MappingTraits { static void mapping(IO &YIO, Kernel::Metadata &MD) { YIO.mapRequired(Kernel::Key::Name, MD.Name); @@ -191,6 +235,8 @@ YIO.mapOptional(Kernel::Key::Attrs, MD.Attrs); if (!MD.Args.empty() || !YIO.outputting()) YIO.mapOptional(Kernel::Key::Args, MD.Args); + if (MD.Stats.notEmpty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Stats, MD.Stats); } }; @@ -562,13 +608,43 @@ Arg.TypeName = TypeName; } +void Streamer::emitKernelStats(const amd_kernel_code_t &KernelCode) { + auto &Stats = Program.Kernels.back().Stats; + + Stats.WavefrontNumSGPRs = KernelCode.wavefront_sgpr_count; + Stats.WorkitemNumVGPRs = KernelCode.workitem_vgpr_count; + Stats.WorkitemPrivateSegmentByteSize = + KernelCode.workitem_private_segment_byte_size; + Stats.WorkgroupGroupSegmentByteSize = + KernelCode.workgroup_group_segment_byte_size; + Stats.GdsSegmentByteSize = KernelCode.gds_segment_byte_size; + Stats.KernargSegmentByteSize = KernelCode.kernarg_segment_byte_size; + Stats.WorkgroupNumFbarriers = KernelCode.workgroup_fbarrier_count; + Stats.ReservedNumVGPRs = KernelCode.reserved_vgpr_count; + Stats.ReservedFirstVGPR = KernelCode.reserved_vgpr_first; + Stats.ReservedNumSGPRs = KernelCode.reserved_sgpr_count; + Stats.ReservedFirstSGPR = KernelCode.reserved_sgpr_first; + Stats.DebugWavefrontPrivateSegmentOffsetSGPR = + KernelCode.debug_wavefront_private_segment_offset_sgpr; + Stats.DebugPrivateSegmentBufferSGPR = + KernelCode.debug_private_segment_buffer_sgpr; + Stats.KernargSegmentAlign = KernelCode.kernarg_segment_alignment; + Stats.GroupSegmentAlign = KernelCode.group_segment_alignment; + Stats.PrivateSegmentAlign = KernelCode.private_segment_alignment; + Stats.WavefrontSize = KernelCode.wavefront_size; + Stats.CallConvention = KernelCode.call_convention; + Stats.IsDebugEnabled = + KernelCode.code_properties & AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;; +} + void Streamer::begin(const FeatureBitset &Features, const Module &Mod) { emitVersion(); emitIsa(Features); emitPrintf(Mod); } -void Streamer::emitKernel(const Function &Func) { +void Streamer::emitKernel(const Function &Func, + const amd_kernel_code_t &KernelCode) { if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; @@ -579,6 +655,7 @@ emitKernelLanguage(Func); emitKernelAttrs(Func); emitKernelArgs(Func); + emitKernelStats(KernelCode); } ErrorOr Streamer::toYamlString() { Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -52,7 +52,8 @@ virtual void EmitStartOfRuntimeMetadata(const FeatureBitset &Features, const Module &Mod); - virtual void EmitKernelRuntimeMetadata(const Function &Func); + virtual void EmitKernelRuntimeMetadata(const Function &Func, + const amd_kernel_code_t &KernelCode); virtual void EmitEndOfRuntimeMetadata(const FeatureBitset &Features); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -48,8 +48,10 @@ RuntimeMetadataStreamer.begin(Features, Mod); } -void AMDGPUTargetStreamer::EmitKernelRuntimeMetadata(const Function &Func) { - RuntimeMetadataStreamer.emitKernel(Func); +void AMDGPUTargetStreamer::EmitKernelRuntimeMetadata( + const Function &Func, + const amd_kernel_code_t &KernelCode) { + RuntimeMetadataStreamer.emitKernel(Func, KernelCode); } void AMDGPUTargetStreamer::EmitEndOfRuntimeMetadata( Index: lib/Target/AMDGPU/Shared/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/Shared/AMDGPURuntimeMetadata.h +++ lib/Target/AMDGPU/Shared/AMDGPURuntimeMetadata.h @@ -266,6 +266,107 @@ } // end namespace Arg + +//===----------------------------------------------------------------------===// +// Kernel Statistics Metadata. +//===----------------------------------------------------------------------===// +namespace Stats { + +namespace Key { + +/// \brief Key for Kernel::Stats::Metadata::WavefrontNumSGPRs. +constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; +/// \brief Key for Kernel::Stats::Metadata::WorkitemNumVGPRs. +constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; +/// \brief Key for +/// Kernel::Stats::Metadata::WorkitemPrivateSegmentByteSize. +constexpr char WorkitemPrivateSegmentByteSize[] = + "WorkitemPrivateSegmentByteSize"; +/// \brief Key for +/// Kernel::Stats::Metadata::WorkgroupGroupSegmentByteSize. +constexpr char WorkgroupGroupSegmentByteSize[] = + "WorkgroupGroupSegmentByteSize"; +/// \brief Key for +/// Kernel::Stats::Metadata::GdsSegmentByteSize. +constexpr char GdsSegmentByteSize[] = + "GdsSegmentByteSize"; +/// \brief Key for +/// Kernel::Stats::Metadata::KernargSegmentByteSize. +constexpr char KernargSegmentByteSize[] = + "KernargSegmentByteSize"; +/// \brief Key for Kernel::Stats::Metadata::WorkgroupNumFbarriers. +constexpr char WorkgroupNumFbarriers[] = "WorkgroupNumFbarriers"; +/// \brief Key for Kernel::Stats::Metadata::ReservedNumVGPRs. +constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs"; +/// \brief Key for Kernel::Stats::Metadata::ReservedFirstVGPR. +constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR"; +/// \brief Key for Kernel::Stats::Metadata::ReservedNumSGPRs. +constexpr char ReservedNumSGPRs[] = "ReservedNumSGPRs"; +/// \brief Key for Kernel::Stats::Metadata::ReservedFirstSGPR. +constexpr char ReservedFirstSGPR[] = "ReservedFirstSGPR"; +/// \brief Key for +/// Kernel::Stats::Metadata::DebugWavefrontPrivateSegmentOffsetSGPR. +constexpr char DebugWavefrontPrivateSegmentOffsetSGPR[] = + "DebugWavefrontPrivateSegmentOffsetSGPR"; +/// \brief Key for +/// Kernel::Stats::Metadata::DebugPrivateSegmentBufferSGPR. +constexpr char DebugPrivateSegmentBufferSGPR[] = + "DebugPrivateSegmentBufferSGPR"; +/// \brief Key for Kernel::Stats::Metadata::KernargSegmentAlign. +constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; +/// \brief Key for Kernel::Stats::Metadata::GroupSegmentAlign. +constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; +/// \brief Key for Kernel::Stats::Metadata::PrivateSegmentAlign. +constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; +/// \brief Key for Kernel::Stats::Metadata::WavefrontSize. +constexpr char WavefrontSize[] = "WavefrontSize"; +/// \brief Key for Kernel::Stats::Metadata::CallConvention. +constexpr char CallConvention[] = "CallConvention"; +/// \brief Key for Kernel::Stats::Metadata::IsDebugEnabled. +constexpr char IsDebugEnabled[] = "IsDebugEnabled"; + +} // end namespace Key + +/// \brief In-memory representation of kernel statistics metadata. +struct Metadata final { + uint16_t WavefrontNumSGPRs = 0; + uint16_t WorkitemNumVGPRs = 0; + uint32_t WorkitemPrivateSegmentByteSize = 0; + uint32_t WorkgroupGroupSegmentByteSize = 0; + uint32_t GdsSegmentByteSize = 0; + uint64_t KernargSegmentByteSize = 0; + uint32_t WorkgroupNumFbarriers = 0; + uint16_t ReservedNumVGPRs = 0; + uint16_t ReservedFirstVGPR = 0; + uint16_t ReservedNumSGPRs = 0; + uint16_t ReservedFirstSGPR = 0; + uint16_t DebugWavefrontPrivateSegmentOffsetSGPR = 0; + uint16_t DebugPrivateSegmentBufferSGPR = 0; + uint8_t KernargSegmentAlign = 0; + uint8_t GroupSegmentAlign = 0; + uint8_t PrivateSegmentAlign = 0; + uint8_t WavefrontSize = 0; + int32_t CallConvention = -1; + bool IsDebugEnabled = false; + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel statistics metadata is not empty, false otherwise. + bool notEmpty() const { + return WavefrontNumSGPRs || WorkitemNumVGPRs || + WorkitemPrivateSegmentByteSize || WorkgroupGroupSegmentByteSize || + GdsSegmentByteSize || KernargSegmentByteSize || WorkgroupNumFbarriers || + ReservedNumVGPRs || ReservedFirstVGPR || ReservedNumSGPRs || + ReservedFirstSGPR || DebugWavefrontPrivateSegmentOffsetSGPR || + DebugPrivateSegmentBufferSGPR || KernargSegmentAlign || + GroupSegmentAlign || PrivateSegmentAlign || WavefrontSize || + CallConvention != -1 || IsDebugEnabled; + } +}; + +} // end namespace Stats + namespace Key { /// \brief Key for Kernel::Metadata::Name. @@ -278,6 +379,8 @@ constexpr char Attrs[] = "Attrs"; /// \brief Key for Kernel::Metadata::Args. constexpr char Args[] = "Args"; +/// \brief Key for Kernel::Metadata::Stats. +constexpr char Stats[] = "Stats"; } // end namespace Key @@ -293,6 +396,8 @@ Attrs::Metadata Attrs = Attrs::Metadata(); /// \brief Arguments metadata. Optional. std::vector Args = std::vector(); + /// \brief Statistics metadata. Optional. + Stats::Metadata Stats = Stats::Metadata(); /// \brief Default constructor. Metadata() = default; Index: test/CodeGen/AMDGPU/runtime-metadata-from-llvm-ir-full.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata-from-llvm-ir-full.ll +++ test/CodeGen/AMDGPU/runtime-metadata-from-llvm-ir-full.ll @@ -1274,8 +1274,8 @@ ; NOTES-NEXT: Owner Data size Description ; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001) ; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003) -; GFX700: AMD 0x00007834 Unknown note type: (0x00000009) -; GFX800: AMD 0x00007834 Unknown note type: (0x00000009) -; GFX900: AMD 0x00007835 Unknown note type: (0x00000009) +; GFX700: AMD 0x00009288 Unknown note type: (0x00000009) +; GFX800: AMD 0x000092a7 Unknown note type: (0x00000009) +; GFX900: AMD 0x00009289 Unknown note type: (0x00000009) ; PARSER: AMDGPU Runtime Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/runtime-metadata-kernel-stats.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/runtime-metadata-kernel-stats.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +; CHECK: --- +; CHECK: Version: [ 3, 0 ] +; CHECK: Isa +; CHECK: Kernels: +; CHECK: - Name: test +; CHECK: Stats: +; CHECK: WavefrontNumSGPRs: 6 +; CHECK: WorkitemNumVGPRs: 4 +; CHECK: KernargSegmentByteSize: 24 +; CHECK: KernargSegmentAlign: 4 +; CHECK: GroupSegmentAlign: 4 +; CHECK: PrivateSegmentAlign: 4 +; CHECK: WavefrontSize: 6 +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +}