Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -89,6 +90,8 @@ }; void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const; + void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, + const MachineFunction &MF) const; void findNumUsedRegistersSI(const MachineFunction &MF, unsigned &NumSGPR, unsigned &NumVGPR) const; @@ -97,8 +100,6 @@ /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); - void EmitAmdKernelCodeT(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) const; public: explicit AMDGPUAsmPrinter(TargetMachine &TM, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -21,7 +21,6 @@ #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" -#include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" @@ -145,14 +144,19 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; + amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); - EmitAmdKernelCodeT(*MF, KernelInfo); + getAmdKernelCode(KernelCode, KernelInfo, *MF); + + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + getTargetStreamer().EmitAMDKernelCodeT(KernelCode); } if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction()); + getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(), + KernelCode); } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { @@ -732,94 +736,88 @@ } } -void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) const { +void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, + const SIProgramInfo &KernelInfo, + const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo(); const SISubtarget &STM = MF.getSubtarget(); - amd_kernel_code_t header; - AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); - header.compute_pgm_resource_registers = + Out.compute_pgm_resource_registers = KernelInfo.ComputePGMRSrc1 | (KernelInfo.ComputePGMRSrc2 << 32); - header.code_properties = AMD_CODE_PROPERTY_IS_PTR64; - + Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64; - AMD_HSA_BITS_SET(header.code_properties, + AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize())); if (MFI->hasPrivateSegmentBuffer()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } if (MFI->hasDispatchPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (MFI->hasQueuePtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; if (MFI->hasKernargSegmentPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; if (MFI->hasDispatchID()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; if (MFI->hasFlatScratchInit()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; - - // TODO: Private segment size + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; if (MFI->hasGridWorkgroupCountX()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X; } if (MFI->hasGridWorkgroupCountY()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y; } if (MFI->hasGridWorkgroupCountZ()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z; } if (MFI->hasDispatchPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (STM.debuggerSupported()) - header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; + Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; if (STM.isXNACKEnabled()) - header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; + Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; // FIXME: Should use getKernArgSize - header.kernarg_segment_byte_size = + Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); - header.wavefront_sgpr_count = KernelInfo.NumSGPR; - header.workitem_vgpr_count = KernelInfo.NumVGPR; - header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; - header.workgroup_group_segment_byte_size = KernelInfo.LDSSize; - header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; - header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + Out.wavefront_sgpr_count = KernelInfo.NumSGPR; + Out.workitem_vgpr_count = KernelInfo.NumVGPR; + Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize; + Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize; + Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; + Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. - header.kernarg_segment_alignment = std::max((size_t)4, + Out.kernarg_segment_alignment = std::max((size_t)4, countTrailingZeros(MFI->getMaxKernArgAlign())); if (STM.debuggerEmitPrologue()) { - header.debug_wavefront_private_segment_offset_sgpr = + Out.debug_wavefront_private_segment_offset_sgpr = KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - header.debug_private_segment_buffer_sgpr = + Out.debug_private_segment_buffer_sgpr = KernelInfo.DebuggerPrivateSegmentBufferSGPR; } - - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - getTargetStreamer().EmitAMDKernelCodeT(header); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h @@ -274,6 +274,81 @@ } // end namespace Arg +//===----------------------------------------------------------------------===// +// Kernel Code Properties Metadata. +//===----------------------------------------------------------------------===// +namespace CodeProps { + +namespace Key { +/// \brief Key for Kernel::CodeProps::mKernargSegmentSize. +constexpr char KernargSegmentSize[] = "KernargSegmentSize"; +/// \brief Key for Kernel::CodeProps::mWorkgroupGroupSegmentSize. +constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize"; +/// \brief Key for Kernel::CodeProps::mWorkitemPrivateSegmentSize. +constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize"; +/// \brief Key for Kernel::CodeProps::mWavefrontNumSGPRs. +constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; +/// \brief Key for Kernel::CodeProps::mWorkitemNumVGPRs. +constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; +/// \brief Key for Kernel::CodeProps::mKernargSegmentAlign. +constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; +/// \brief Key for Kernel::CodeProps::mGroupSegmentAlign. +constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; +/// \brief Key for Kernel::CodeProps::mPrivateSegmentAlign. +constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; +/// \brief Key for Kernel::CodeProps::mWavefrontSize. +constexpr char WavefrontSize[] = "WavefrontSize"; +} // end namespace Key + +/// \brief In-memory representation of kernel code properties metadata. +struct Metadata final { + /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory + /// holds the values of the arguments to the kernel. Optional. + uint64_t mKernargSegmentSize = 0; + /// \brief Size in bytes of the group segment memory required by a workgroup. + /// This value does not include any dynamically allocated group segment memory + /// that may be added when the kernel is dispatched. Optional. + uint32_t mWorkgroupGroupSegmentSize = 0; + /// \brief Size in bytes of the private segment memory required by a workitem. + /// Private segment memory includes arg, spill and private segments. Optional. + uint32_t mWorkitemPrivateSegmentSize = 0; + /// \brief Total number of SGPRs used by a wavefront. Optional. + uint16_t mWavefrontNumSGPRs = 0; + /// \brief Total number of VGPRs used by a workitem. Optional. + uint16_t mWorkitemNumVGPRs = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// kernarg memory segment. Expressed as a power of two. Optional. + uint8_t mKernargSegmentAlign = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// group memory segment. Expressed as a power of two. Optional. + uint8_t mGroupSegmentAlign = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// private memory segment. Expressed as a power of two. Optional. + uint8_t mPrivateSegmentAlign = 0; + /// \brief Wavefront size. Expressed as a power of two. Optional. + uint8_t mWavefrontSize = 0; + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel code properties metadata is empty, false + /// otherwise. + bool empty() const { + return !notEmpty(); + } + + /// \returns True if kernel code properties metadata is not empty, false + /// otherwise. + bool notEmpty() const { + return mKernargSegmentSize || mWorkgroupGroupSegmentSize || + mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs || + mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign || + mPrivateSegmentAlign || mWavefrontSize; + } +}; + +} // end namespace CodeProps + namespace Key { /// \brief Key for Kernel::Metadata::mName. constexpr char Name[] = "Name"; @@ -285,6 +360,8 @@ constexpr char Attrs[] = "Attrs"; /// \brief Key for Kernel::Metadata::mArgs. constexpr char Args[] = "Args"; +/// \brief Key for Kernel::Metadata::mCodeProps. +constexpr char CodeProps[] = "CodeProps"; } // end namespace Key /// \brief In-memory representation of kernel metadata. @@ -299,6 +376,8 @@ Attrs::Metadata mAttrs = Attrs::Metadata(); /// \brief Arguments metadata. Optional. std::vector mArgs = std::vector(); + /// \brief Code properties metadata. Optional. + CodeProps::Metadata mCodeProps = CodeProps::Metadata(); /// \brief Default constructor. Metadata() = default; Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #include "AMDGPUCodeObjectMetadata.h" +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorOr.h" @@ -72,6 +73,9 @@ StringRef TypeQual = "", StringRef BaseTypeName = "", StringRef AccQual = "", StringRef Name = "", StringRef TypeName = ""); + + void emitKernelCodeProps(const amd_kernel_code_t &KernelCode); + public: MetadataStreamer() = default; ~MetadataStreamer() = default; @@ -80,7 +84,7 @@ void end() {} - void emitKernel(const Function &Func); + void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode); ErrorOr toYamlString(); Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -154,6 +154,30 @@ }; template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize, uint64_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, + MD.mWorkgroupGroupSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, + MD.mWorkitemPrivateSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, + MD.mWavefrontNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, + MD.mWorkitemNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, + MD.mGroupSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, + MD.mPrivateSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize, uint8_t(0)); + } +}; + +template <> struct MappingTraits { static void mapping(IO &YIO, Kernel::Metadata &MD) { YIO.mapRequired(Kernel::Key::Name, MD.mName); @@ -164,6 +188,8 @@ YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); if (!MD.mArgs.empty() || !YIO.outputting()) YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + if (!MD.mCodeProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); } }; @@ -531,13 +557,31 @@ Arg.mTypeName = TypeName; } +void MetadataStreamer::emitKernelCodeProps( + const amd_kernel_code_t &KernelCode) { + auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps; + + CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size; + CodeProps.mWorkgroupGroupSegmentSize = + KernelCode.workgroup_group_segment_byte_size; + CodeProps.mWorkitemPrivateSegmentSize = + KernelCode.workitem_private_segment_byte_size; + CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count; + CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count; + CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment; + CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment; + CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment; + CodeProps.mWavefrontSize = KernelCode.wavefront_size; +} + void MetadataStreamer::begin(const FeatureBitset &Features, const Module &Mod) { emitVersion(); emitIsa(Features); emitPrintf(Mod); } -void MetadataStreamer::emitKernel(const Function &Func) { +void MetadataStreamer::emitKernel(const Function &Func, + const amd_kernel_code_t &KernelCode) { if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; @@ -548,6 +592,7 @@ emitKernelLanguage(Func); emitKernelAttrs(Func); emitKernelArgs(Func); + emitKernelCodeProps(KernelCode); } ErrorOr MetadataStreamer::toYamlString() { Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -52,7 +52,8 @@ virtual void EmitStartOfCodeObjectMetadata(const FeatureBitset &Features, const Module &Mod); - virtual void EmitKernelCodeObjectMetadata(const Function &Func); + virtual void EmitKernelCodeObjectMetadata( + const Function &Func, const amd_kernel_code_t &KernelCode); virtual void EmitEndOfCodeObjectMetadata(const FeatureBitset &Features); Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -47,8 +47,9 @@ CodeObjectMetadataStreamer.begin(Features, Mod); } -void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(const Function &Func) { - CodeObjectMetadataStreamer.emitKernel(Func); +void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata( + const Function &Func, const amd_kernel_code_t &KernelCode) { + CodeObjectMetadataStreamer.emitKernel(Func, KernelCode); } void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata( Index: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll +++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll @@ -1274,8 +1274,8 @@ ; NOTES-NEXT: Owner Data size Description ; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001) ; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003) -; GFX700: AMD 0x000078b2 Unknown note type: (0x0000000a) -; GFX800: AMD 0x000078b2 Unknown note type: (0x0000000a) -; GFX900: AMD 0x000078b3 Unknown note type: (0x0000000a) +; GFX700: AMD 0x0000928a Unknown note type: (0x0000000a) +; GFX800: AMD 0x000092a9 Unknown note type: (0x0000000a) +; GFX900: AMD 0x0000928b Unknown note type: (0x0000000a) ; PARSER: AMDGPU Code Object Metadata Parser Test: PASS Index: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-code-props.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-code-props.ll +++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-code-props.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] + +; CHECK: Kernels: +; CHECK: - Name: test +; CHECK: CodeProps: +; CHECK: KernargSegmentSize: 24 +; GFX700: WavefrontNumSGPRs: 6 +; GFX800: WavefrontNumSGPRs: 96 +; GFX900: WavefrontNumSGPRs: 6 +; GFX700: WorkitemNumVGPRs: 4 +; GFX800: WorkitemNumVGPRs: 6 +; GFX900: WorkitemNumVGPRs: 6 +; CHECK: KernargSegmentAlign: 4 +; CHECK: GroupSegmentAlign: 4 +; CHECK: PrivateSegmentAlign: 4 +; CHECK: WavefrontSize: 6 +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} Index: llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-code-props.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-code-props.s +++ llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-code-props.s @@ -0,0 +1,24 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_code_object_metadata +// CHECK: Version: [ 1, 0 ] +// CHECK: Kernels: +// CHECK: - Name: test_kernel +// CHECK: CodeProps: +// CHECK: KernargSegmentSize: 24 +// CHECK: WorkitemPrivateSegmentSize: 16 +// CHECK: WavefrontNumSGPRs: 6 +// CHECK: WorkitemNumVGPRs: 12 +.amdgpu_code_object_metadata + Version: [ 1, 0 ] + Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] + Kernels: + - Name: test_kernel + CodeProps: + KernargSegmentSize: 24 + WorkitemPrivateSegmentSize: 16 + WavefrontNumSGPRs: 6 + WorkitemNumVGPRs: 12 +.end_amdgpu_code_object_metadata