Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -26,6 +26,7 @@ namespace llvm { +class AMDGPUTargetStreamer; class MCOperand; class AMDGPUAsmPrinter final : public AsmPrinter { @@ -103,10 +104,14 @@ explicit AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); - bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override; + const MCSubtargetInfo* getSTI() const; + + AMDGPUTargetStreamer& getTargetStreamer() const; + + bool runOnMachineFunction(MachineFunction &MF) override; + /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated /// pseudo lowering. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; @@ -132,6 +137,8 @@ void EmitStartOfAsmFile(Module &M) override; + void EmitEndOfAsmFile(Module &M) override; + bool isBlockOnlyReachableByFallthrough( const MachineBasicBlock *MBB) const override; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -99,23 +99,33 @@ return "AMDGPU Assembly Printer"; } +const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const { + return TM.getMCSubtargetInfo(); +} + +AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const { + return static_cast(*OutStreamer->getTargetStreamer()); +} + void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - AMDGPUTargetStreamer *TS = - static_cast(OutStreamer->getTargetStreamer()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); - TS->EmitDirectiveHSACodeObjectVersion(2, 1); + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1); + getTargetStreamer().EmitDirectiveHSACodeObjectISA( + ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + getTargetStreamer().EmitStartOfCodeObjectMetadata( + getSTI()->getFeatureBits(), M); +} - const MCSubtargetInfo *STI = TM.getMCSubtargetInfo(); - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits()); - TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, - "AMD", "AMDGPU"); +void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; - // Emit runtime metadata. - TS->EmitRuntimeMetadata(STI->getFeatureBits(), M); + getTargetStreamer().EmitEndOfCodeObjectMetadata(getSTI()->getFeatureBits()); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -132,7 +142,6 @@ return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64); } - void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; @@ -140,17 +149,20 @@ getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } + + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction()); } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { - AMDGPUTargetStreamer *TS = - static_cast(OutStreamer->getTargetStreamer()); SmallString<128> SymbolName; getNameWithPrefix(SymbolName, MF->getFunction()), - TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); + getTargetStreamer().EmitAMDGPUSymbolType( + SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } AsmPrinter::EmitFunctionEntryLabel(); @@ -806,11 +818,8 @@ KernelInfo.DebuggerPrivateSegmentBufferSGPR; } - AMDGPUTargetStreamer *TS = - static_cast(OutStreamer->getTargetStreamer()); - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - TS->EmitAMDKernelCodeT(header); + getTargetStreamer().EmitAMDKernelCodeT(header); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, Index: lib/Target/AMDGPU/AMDGPUPTNote.h =================================================================== --- lib/Target/AMDGPU/AMDGPUPTNote.h +++ lib/Target/AMDGPU/AMDGPUPTNote.h @@ -33,9 +33,7 @@ NT_AMDGPU_HSA_PRODUCER = 4, NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, NT_AMDGPU_HSA_EXTENSION = 6, - NT_AMDGPU_HSA_RUNTIME_METADATA_V_1 = 7, // deprecated since 12/14/16. - NT_AMDGPU_HSA_RUNTIME_METADATA_V_2 = 8, - NT_AMDGPU_HSA_RUNTIME_METADATA = NT_AMDGPU_HSA_RUNTIME_METADATA_V_2, + NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10, NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 }; Index: lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ /dev/null @@ -1,290 +0,0 @@ -//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// -/// Enums and structure types used by runtime metadata. -/// -/// Runtime requests certain information (metadata) about kernels to be able -/// to execute the kernels and answer the queries about the kernels. -/// The metadata is represented as a note element in the .note ELF section of a -/// binary (code object). The desc field of the note element is a YAML string -/// consisting of key-value pairs. Each key is a string. Each value can be -/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps. -/// At the beginning of the YAML string is the module level YAML map. A -/// kernel-level YAML map is in the amd.Kernels sequence. A -/// kernel-argument-level map is in the amd.Args sequence. -/// -/// The format should be kept backward compatible. New enum values and bit -/// fields should be appended at the end. It is suggested to bump up the -/// revision number whenever the format changes and document the change -/// in the revision in this header. -/// -// -//===----------------------------------------------------------------------===// -// -#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H -#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H - -#include -#include -#include - -namespace AMDGPU { -namespace RuntimeMD { - - // Version and revision of runtime metadata - const unsigned char MDVersion = 2; - const unsigned char MDRevision = 1; - - // Name of keys for runtime metadata. - namespace KeyName { - - // Runtime metadata version - const char MDVersion[] = "amd.MDVersion"; - - // Instruction set architecture information - const char IsaInfo[] = "amd.IsaInfo"; - // Wavefront size - const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize"; - // Local memory size in bytes - const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize"; - // Number of execution units per compute unit - const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU"; - // Maximum number of waves per execution unit - const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU"; - // Maximum flat work group size - const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize"; - // SGPR allocation granularity - const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule"; - // Total number of SGPRs - const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs"; - // Addressable number of SGPRs - const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs"; - // VGPR allocation granularity - const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule"; - // Total number of VGPRs - const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs"; - // Addressable number of VGPRs - const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs"; - - // Language - const char Language[] = "amd.Language"; - // Language version - const char LanguageVersion[] = "amd.LanguageVersion"; - - // Kernels - const char Kernels[] = "amd.Kernels"; - // Kernel name - const char KernelName[] = "amd.KernelName"; - // Kernel arguments - const char Args[] = "amd.Args"; - // Kernel argument size in bytes - const char ArgSize[] = "amd.ArgSize"; - // Kernel argument alignment - const char ArgAlign[] = "amd.ArgAlign"; - // Kernel argument type name - const char ArgTypeName[] = "amd.ArgTypeName"; - // Kernel argument name - const char ArgName[] = "amd.ArgName"; - // Kernel argument kind - const char ArgKind[] = "amd.ArgKind"; - // Kernel argument value type - const char ArgValueType[] = "amd.ArgValueType"; - // Kernel argument address qualifier - const char ArgAddrQual[] = "amd.ArgAddrQual"; - // Kernel argument access qualifier - const char ArgAccQual[] = "amd.ArgAccQual"; - // Kernel argument is const qualified - const char ArgIsConst[] = "amd.ArgIsConst"; - // Kernel argument is restrict qualified - const char ArgIsRestrict[] = "amd.ArgIsRestrict"; - // Kernel argument is volatile qualified - const char ArgIsVolatile[] = "amd.ArgIsVolatile"; - // Kernel argument is pipe qualified - const char ArgIsPipe[] = "amd.ArgIsPipe"; - // Required work group size - const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; - // Work group size hint - const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; - // Vector type hint - const char VecTypeHint[] = "amd.VecTypeHint"; - // Kernel index for device enqueue - const char KernelIndex[] = "amd.KernelIndex"; - // No partial work groups - const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; - // Prinf function call information - const char PrintfInfo[] = "amd.PrintfInfo"; - // The actual kernel argument access qualifier - const char ArgActualAcc[] = "amd.ArgActualAcc"; - // Alignment of pointee type - const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; - - } // end namespace KeyName - - namespace KernelArg { - - enum Kind : uint8_t { - ByValue = 0, - GlobalBuffer = 1, - DynamicSharedPointer = 2, - Sampler = 3, - Image = 4, - Pipe = 5, - Queue = 6, - HiddenGlobalOffsetX = 7, - HiddenGlobalOffsetY = 8, - HiddenGlobalOffsetZ = 9, - HiddenNone = 10, - HiddenPrintfBuffer = 11, - HiddenDefaultQueue = 12, - HiddenCompletionAction = 13, - }; - - enum ValueType : uint16_t { - Struct = 0, - I8 = 1, - U8 = 2, - I16 = 3, - U16 = 4, - F16 = 5, - I32 = 6, - U32 = 7, - F32 = 8, - I64 = 9, - U64 = 10, - F64 = 11, - }; - - // Avoid using 'None' since it conflicts with a macro in X11 header file. - enum AccessQualifer : uint8_t { - AccNone = 0, - ReadOnly = 1, - WriteOnly = 2, - ReadWrite = 3, - }; - - enum AddressSpaceQualifer : uint8_t { - Private = 0, - Global = 1, - Constant = 2, - Local = 3, - Generic = 4, - Region = 5, - }; - - } // end namespace KernelArg - - // Invalid values are used to indicate an optional key should not be emitted. - const uint8_t INVALID_ADDR_QUAL = 0xff; - const uint8_t INVALID_ACC_QUAL = 0xff; - const uint32_t INVALID_KERNEL_INDEX = ~0U; - - namespace KernelArg { - - // In-memory representation of kernel argument information. - struct Metadata { - uint32_t Size = 0; - uint32_t Align = 0; - uint32_t PointeeAlign = 0; - uint8_t Kind = 0; - uint16_t ValueType = 0; - std::string TypeName; - std::string Name; - uint8_t AddrQual = INVALID_ADDR_QUAL; - uint8_t AccQual = INVALID_ACC_QUAL; - uint8_t IsVolatile = 0; - uint8_t IsConst = 0; - uint8_t IsRestrict = 0; - uint8_t IsPipe = 0; - - Metadata() = default; - }; - - } // end namespace KernelArg - - namespace Kernel { - - // In-memory representation of kernel information. - struct Metadata { - std::string Name; - std::string Language; - std::vector LanguageVersion; - std::vector ReqdWorkGroupSize; - std::vector WorkGroupSizeHint; - std::string VecTypeHint; - uint32_t KernelIndex = INVALID_KERNEL_INDEX; - uint8_t NoPartialWorkGroups = 0; - std::vector Args; - - Metadata() = default; - }; - - } // end namespace Kernel - - namespace IsaInfo { - - /// \brief In-memory representation of instruction set architecture - /// information. - struct Metadata { - /// \brief Wavefront size. - unsigned WavefrontSize = 0; - /// \brief Local memory size in bytes. - unsigned LocalMemorySize = 0; - /// \brief Number of execution units per compute unit. - unsigned EUsPerCU = 0; - /// \brief Maximum number of waves per execution unit. - unsigned MaxWavesPerEU = 0; - /// \brief Maximum flat work group size. - unsigned MaxFlatWorkGroupSize = 0; - /// \brief SGPR allocation granularity. - unsigned SGPRAllocGranule = 0; - /// \brief Total number of SGPRs. - unsigned TotalNumSGPRs = 0; - /// \brief Addressable number of SGPRs. - unsigned AddressableNumSGPRs = 0; - /// \brief VGPR allocation granularity. - unsigned VGPRAllocGranule = 0; - /// \brief Total number of VGPRs. - unsigned TotalNumVGPRs = 0; - /// \brief Addressable number of VGPRs. - unsigned AddressableNumVGPRs = 0; - - Metadata() = default; - }; - - } // end namespace IsaInfo - - namespace Program { - - // In-memory representation of program information. - struct Metadata { - std::vector MDVersionSeq; - IsaInfo::Metadata IsaInfo; - std::vector PrintfInfo; - std::vector Kernels; - - explicit Metadata() = default; - - // Construct from an YAML string. - explicit Metadata(const std::string &YAML); - - // Convert to YAML string. - std::string toYAML(); - - // Convert from YAML string. - static Metadata fromYAML(const std::string &S); - }; - - } //end namespace Program - -} // end namespace RuntimeMD -} // end namespace AMDGPU - -#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -806,7 +806,7 @@ bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectISA(); - bool ParseDirectiveRuntimeMetadata(); + bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); bool ParseSectionDirectiveHSAText(); @@ -2259,43 +2259,45 @@ return false; } -bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() { - std::string Metadata; - raw_string_ostream MS(Metadata); +bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() { + std::string YamlString; + raw_string_ostream YamlStream(YamlString); getLexer().setSkipSpace(false); bool FoundEnd = false; while (!getLexer().is(AsmToken::Eof)) { while (getLexer().is(AsmToken::Space)) { - MS << ' '; + YamlStream << getLexer().getTok().getString(); Lex(); } if (getLexer().is(AsmToken::Identifier)) { StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == ".end_amdgpu_runtime_metadata") { + if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) { Lex(); FoundEnd = true; break; } } - MS << Parser.parseStringToEndOfStatement() - << getContext().getAsmInfo()->getSeparatorString(); + YamlStream << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); Parser.eatToEndOfStatement(); } getLexer().setSkipSpace(true); - if (getLexer().is(AsmToken::Eof) && !FoundEnd) - return TokError("expected directive .end_amdgpu_runtime_metadata not found"); + if (getLexer().is(AsmToken::Eof) && !FoundEnd) { + return TokError( + "expected directive .end_amdgpu_code_object_metadata not found"); + } - MS.flush(); + YamlStream.flush(); - if (getTargetStreamer().EmitRuntimeMetadata(getFeatureBits(), Metadata)) - return Error(getParser().getTok().getLoc(), "invalid runtime metadata"); + if (!getTargetStreamer().EmitCodeObjectMetadata(getFeatureBits(), YamlString)) + return Error(getParser().getTok().getLoc(), "invalid code object metadata"); return false; } @@ -2407,8 +2409,8 @@ if (IDVal == ".hsa_code_object_isa") return ParseDirectiveHSACodeObjectISA(); - if (IDVal == ".amdgpu_runtime_metadata") - return ParseDirectiveRuntimeMetadata(); + if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin) + return ParseDirectiveCodeObjectMetadata(); if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h @@ -0,0 +1,347 @@ +//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata definitions and in-memory +/// representations. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H + +#include +#include +#include +#include + +namespace llvm { +namespace AMDGPU { + +//===----------------------------------------------------------------------===// +// Code Object Metadata. +//===----------------------------------------------------------------------===// +namespace CodeObject { + +/// \brief Code object metadata major version. +constexpr uint8_t MetadataVersionMajor = 1; +/// \brief Code object metadata minor version. +constexpr uint8_t MetadataVersionMinor = 0; + +/// \brief Code object metadata beginning assembler directive. +constexpr char MetadataAssemblerDirectiveBegin[] = + ".amdgpu_code_object_metadata"; +/// \brief Code object metadata ending assembler directive. +constexpr char MetadataAssemblerDirectiveEnd[] = + ".end_amdgpu_code_object_metadata"; + +/// \brief Access qualifiers. +enum class AccessQualifier : uint8_t { + Default = 0, + ReadOnly = 1, + WriteOnly = 2, + ReadWrite = 3, + Unknown = 0xff +}; + +/// \brief Address space qualifiers. +enum class AddressSpaceQualifier : uint8_t { + Private = 0, + Global = 1, + Constant = 2, + Local = 3, + Generic = 4, + Region = 5, + Unknown = 0xff +}; + +/// \brief Value kinds. +enum class ValueKind : uint8_t { + ByValue = 0, + GlobalBuffer = 1, + DynamicSharedPointer = 2, + Sampler = 3, + Image = 4, + Pipe = 5, + Queue = 6, + HiddenGlobalOffsetX = 7, + HiddenGlobalOffsetY = 8, + HiddenGlobalOffsetZ = 9, + HiddenNone = 10, + HiddenPrintfBuffer = 11, + HiddenDefaultQueue = 12, + HiddenCompletionAction = 13, + Unknown = 0xff +}; + +/// \brief Value types. +enum class ValueType : uint8_t { + Struct = 0, + I8 = 1, + U8 = 2, + I16 = 3, + U16 = 4, + F16 = 5, + I32 = 6, + U32 = 7, + F32 = 8, + I64 = 9, + U64 = 10, + F64 = 11, + Unknown = 0xff +}; + +//===----------------------------------------------------------------------===// +// Instruction Set Architecture Metadata (ISA). +//===----------------------------------------------------------------------===// +namespace Isa { + +namespace Key { +/// \brief Key for Isa::Metadata::mWavefrontSize. +constexpr char WavefrontSize[] = "WavefrontSize"; +/// \brief Key for Isa::Metadata::mLocalMemorySize. +constexpr char LocalMemorySize[] = "LocalMemorySize"; +/// \brief Key for Isa::Metadata::mEUsPerCU. +constexpr char EUsPerCU[] = "EUsPerCU"; +/// \brief Key for Isa::Metadata::mMaxWavesPerEU. +constexpr char MaxWavesPerEU[] = "MaxWavesPerEU"; +/// \brief Key for Isa::Metadata::mMaxFlatWorkGroupSize. +constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize"; +/// \brief Key for Isa::Metadata::mSGPRAllocGranule. +constexpr char SGPRAllocGranule[] = "SGPRAllocGranule"; +/// \brief Key for Isa::Metadata::mTotalNumSGPRs. +constexpr char TotalNumSGPRs[] = "TotalNumSGPRs"; +/// \brief Key for Isa::Metadata::mAddressableNumSGPRs. +constexpr char AddressableNumSGPRs[] = "AddressableNumSGPRs"; +/// \brief Key for Isa::Metadata::mVGPRAllocGranule. +constexpr char VGPRAllocGranule[] = "VGPRAllocGranule"; +/// \brief Key for Isa::Metadata::mTotalNumVGPRs. +constexpr char TotalNumVGPRs[] = "TotalNumVGPRs"; +/// \brief Key for Isa::Metadata::mAddressableNumVGPRs. +constexpr char AddressableNumVGPRs[] = "AddressableNumVGPRs"; +} // end namespace Key + +/// \brief In-memory representation of instruction set architecture metadata. +struct Metadata final { + /// \brief Wavefront size. Required. + uint32_t mWavefrontSize = 0; + /// \brief Local memory size in bytes. Required. + uint32_t mLocalMemorySize = 0; + /// \brief Number of execution units per compute unit. Required. + uint32_t mEUsPerCU = 0; + /// \brief Maximum number of waves per execution unit. Required. + uint32_t mMaxWavesPerEU = 0; + /// \brief Maximum flat work group size. Required. + uint32_t mMaxFlatWorkGroupSize = 0; + /// \brief SGPR allocation granularity. Required. + uint32_t mSGPRAllocGranule = 0; + /// \brief Total number of SGPRs. Required. + uint32_t mTotalNumSGPRs = 0; + /// \brief Addressable number of SGPRs. Required. + uint32_t mAddressableNumSGPRs = 0; + /// \brief VGPR allocation granularity. Required. + uint32_t mVGPRAllocGranule = 0; + /// \brief Total number of VGPRs. Required. + uint32_t mTotalNumVGPRs = 0; + /// \brief Addressable number of VGPRs. Required. + uint32_t mAddressableNumVGPRs = 0; + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Isa + +//===----------------------------------------------------------------------===// +// Kernel Metadata. +//===----------------------------------------------------------------------===// +namespace Kernel { + +//===----------------------------------------------------------------------===// +// Kernel Attributes Metadata. +//===----------------------------------------------------------------------===// +namespace Attrs { + +namespace Key { +/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize. +constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize"; +/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint. +constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint"; +/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint. +constexpr char VecTypeHint[] = "VecTypeHint"; +} // end namespace Key + +/// \brief In-memory representation of kernel attributes metadata. +struct Metadata final { + /// \brief 'reqd_work_group_size' attribute. Optional. + std::vector mReqdWorkGroupSize = std::vector(); + /// \brief 'work_group_size_hint' attribute. Optional. + std::vector mWorkGroupSizeHint = std::vector(); + /// \brief 'vec_type_hint' attribute. Optional. + std::string mVecTypeHint = std::string(); + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel attributes metadata is empty, false otherwise. + bool empty() const { + return mReqdWorkGroupSize.empty() && + mWorkGroupSizeHint.empty() && + mVecTypeHint.empty(); + } + + /// \returns True if kernel attributes metadata is not empty, false otherwise. + bool notEmpty() const { + return !empty(); + } +}; + +} // end namespace Attrs + +//===----------------------------------------------------------------------===// +// Kernel Argument Metadata. +//===----------------------------------------------------------------------===// +namespace Arg { + +namespace Key { +/// \brief Key for Kernel::Arg::Metadata::mSize. +constexpr char Size[] = "Size"; +/// \brief Key for Kernel::Arg::Metadata::mAlign. +constexpr char Align[] = "Align"; +/// \brief Key for Kernel::Arg::Metadata::mValueKind. +constexpr char Kind[] = "Kind"; +/// \brief Key for Kernel::Arg::Metadata::mValueType. +constexpr char ValueType[] = "ValueType"; +/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign. +constexpr char PointeeAlign[] = "PointeeAlign"; +/// \brief Key for Kernel::Arg::Metadata::mAccQual. +constexpr char AccQual[] = "AccQual"; +/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual. +constexpr char AddrSpaceQual[] = "AddrSpaceQual"; +/// \brief Key for Kernel::Arg::Metadata::mIsConst. +constexpr char IsConst[] = "IsConst"; +/// \brief Key for Kernel::Arg::Metadata::mIsPipe. +constexpr char IsPipe[] = "IsPipe"; +/// \brief Key for Kernel::Arg::Metadata::mIsRestrict. +constexpr char IsRestrict[] = "IsRestrict"; +/// \brief Key for Kernel::Arg::Metadata::mIsVolatile. +constexpr char IsVolatile[] = "IsVolatile"; +/// \brief Key for Kernel::Arg::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Arg::Metadata::mTypeName. +constexpr char TypeName[] = "TypeName"; +} // end namespace Key + +/// \brief In-memory representation of kernel argument metadata. +struct Metadata final { + /// \brief Size in bytes. Required. + uint32_t mSize = 0; + /// \brief Alignment in bytes. Required. + uint32_t mAlign = 0; + /// \brief Value kind. Required. + ValueKind mValueKind = ValueKind::Unknown; + /// \brief Value type. Required. + ValueType mValueType = ValueType::Unknown; + /// \brief Pointee alignment in bytes. Optional. + uint32_t mPointeeAlign = 0; + /// \brief Access qualifier. Optional. + AccessQualifier mAccQual = AccessQualifier::Unknown; + /// \brief Address space qualifier. Optional. + AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown; + /// \brief True if 'const' qualifier is specified. Optional. + bool mIsConst = false; + /// \brief True if 'pipe' qualifier is specified. Optional. + bool mIsPipe = false; + /// \brief True if 'restrict' qualifier is specified. Optional. + bool mIsRestrict = false; + /// \brief True if 'volatile' qualifier is specified. Optional. + bool mIsVolatile = false; + /// \brief Name. Optional. + std::string mName = std::string(); + /// \brief Type name. Optional. + std::string mTypeName = std::string(); + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Arg + +namespace Key { +/// \brief Key for Kernel::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Metadata::mLanguage. +constexpr char Language[] = "Language"; +/// \brief Key for Kernel::Metadata::mLanguageVersion. +constexpr char LanguageVersion[] = "LanguageVersion"; +/// \brief Key for Kernel::Metadata::mAttrs. +constexpr char Attrs[] = "Attrs"; +/// \brief Key for Kernel::Metadata::mArgs. +constexpr char Args[] = "Args"; +} // end namespace Key + +/// \brief In-memory representation of kernel metadata. +struct Metadata final { + /// \brief Name. Required. + std::string mName = std::string(); + /// \brief Language. Optional. + std::string mLanguage = std::string(); + /// \brief Language version. Optional. + std::vector mLanguageVersion = std::vector(); + /// \brief Attributes metadata. Optional. + Attrs::Metadata mAttrs = Attrs::Metadata(); + /// \brief Arguments metadata. Optional. + std::vector mArgs = std::vector(); + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Kernel + +namespace Key { +/// \brief Key for CodeObject::Metadata::mVersion. +constexpr char Version[] = "Version"; +/// \brief Key for CodeObject::Metadata::mIsa. +constexpr char Isa[] = "Isa"; +/// \brief Key for CodeObject::Metadata::mPrintf. +constexpr char Printf[] = "Printf"; +/// \brief Key for CodeObject::Metadata::mKernels. +constexpr char Kernels[] = "Kernels"; +} // end namespace Key + +/// \brief In-memory representation of code object metadata. +struct Metadata final { + /// \brief Code object metadata version. Required. + std::vector mVersion = std::vector(); + /// \brief Instruction set architecture metadata. Optional. + Isa::Metadata mIsa = Isa::Metadata(); + /// \brief Printf metadata. Optional. + std::vector mPrintf = std::vector(); + /// \brief Kernels metadata. Optional. + std::vector mKernels = std::vector(); + + /// \brief Default constructor. + Metadata() = default; + + /// \brief Converts \p YamlString to \p CodeObjectMetadata. + static std::error_code fromYamlString(std::string YamlString, + Metadata &CodeObjectMetadata); + + /// \brief Converts \p CodeObjectMetadata to \p YamlString. + static std::error_code toYamlString(Metadata CodeObjectMetadata, + std::string &YamlString); +}; + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -0,0 +1,95 @@ +//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata Streamer. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H + +#include "AMDGPUCodeObjectMetadata.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorOr.h" + +namespace llvm { + +class Argument; +class DataLayout; +class FeatureBitset; +class Function; +class MDNode; +class Module; +class Type; + +namespace AMDGPU { +namespace CodeObject { + +class MetadataStreamer final { +private: + Metadata CodeObjectMetadata; + + void dump(StringRef YamlString) const; + + void verify(StringRef YamlString) const; + + AccessQualifier getAccessQualifier(StringRef AccQual) const; + + AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const; + + ValueKind getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const; + + ValueType getValueType(Type *Ty, StringRef TypeName) const; + + std::string getTypeName(Type *Ty, bool Signed) const; + + std::vector getWorkGroupDimensions(MDNode *Node) const; + + void emitVersion(); + + void emitIsa(const FeatureBitset &Features); + + void emitPrintf(const Module &Mod); + + void emitKernelLanguage(const Function &Func); + + void emitKernelAttrs(const Function &Func); + + void emitKernelArgs(const Function &Func); + + void emitKernelArg(const Argument &Arg); + + void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind, + StringRef TypeQual = "", StringRef BaseTypeName = "", + StringRef AccQual = "", StringRef Name = "", + StringRef TypeName = ""); +public: + MetadataStreamer() = default; + ~MetadataStreamer() = default; + + void begin(const FeatureBitset &Features, const Module &Mod); + + void end() {} + + void emitKernel(const Function &Func); + + ErrorOr toYamlString(); + + ErrorOr toYamlString(const FeatureBitset &Features, + StringRef YamlString); +}; + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -0,0 +1,578 @@ +//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata Streamer. +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUCodeObjectMetadataStreamer.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::CodeObject; +using namespace llvm::AMDGPU::IsaInfo; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { + +static cl::opt DumpCodeObjectMetadata( + "amdgpu-dump-comd", + cl::desc("Dump AMDGPU Code Object Metadata")); +static cl::opt VerifyCodeObjectMetadata( + "amdgpu-verify-comd", + cl::desc("Verify AMDGPU Code Object Metadata")); + +namespace yaml { + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AccessQualifier &EN) { + YIO.enumCase(EN, "Default", AccessQualifier::Default); + YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); + YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); + YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { + YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); + YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); + YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); + YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); + YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); + YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueKind &EN) { + YIO.enumCase(EN, "ByValue", ValueKind::ByValue); + YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); + YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); + YIO.enumCase(EN, "Sampler", ValueKind::Sampler); + YIO.enumCase(EN, "Image", ValueKind::Image); + YIO.enumCase(EN, "Pipe", ValueKind::Pipe); + YIO.enumCase(EN, "Queue", ValueKind::Queue); + YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); + YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); + YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); + YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); + YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); + YIO.enumCase(EN, "HiddenCompletionAction", + ValueKind::HiddenCompletionAction); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueType &EN) { + YIO.enumCase(EN, "Struct", ValueType::Struct); + YIO.enumCase(EN, "I8", ValueType::I8); + YIO.enumCase(EN, "U8", ValueType::U8); + YIO.enumCase(EN, "I16", ValueType::I16); + YIO.enumCase(EN, "U16", ValueType::U16); + YIO.enumCase(EN, "F16", ValueType::F16); + YIO.enumCase(EN, "I32", ValueType::I32); + YIO.enumCase(EN, "U32", ValueType::U32); + YIO.enumCase(EN, "F32", ValueType::F32); + YIO.enumCase(EN, "I64", ValueType::I64); + YIO.enumCase(EN, "U64", ValueType::U64); + YIO.enumCase(EN, "F64", ValueType::F64); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Isa::Metadata &MD) { + YIO.mapRequired(Isa::Key::WavefrontSize, MD.mWavefrontSize); + YIO.mapRequired(Isa::Key::LocalMemorySize, MD.mLocalMemorySize); + YIO.mapRequired(Isa::Key::EUsPerCU, MD.mEUsPerCU); + YIO.mapRequired(Isa::Key::MaxWavesPerEU, MD.mMaxWavesPerEU); + YIO.mapRequired(Isa::Key::MaxFlatWorkGroupSize, MD.mMaxFlatWorkGroupSize); + YIO.mapRequired(Isa::Key::SGPRAllocGranule, MD.mSGPRAllocGranule); + YIO.mapRequired(Isa::Key::TotalNumSGPRs, MD.mTotalNumSGPRs); + YIO.mapRequired(Isa::Key::AddressableNumSGPRs, MD.mAddressableNumSGPRs); + YIO.mapRequired(Isa::Key::VGPRAllocGranule, MD.mVGPRAllocGranule); + YIO.mapRequired(Isa::Key::TotalNumVGPRs, MD.mTotalNumVGPRs); + YIO.mapRequired(Isa::Key::AddressableNumVGPRs, MD.mAddressableNumVGPRs); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { + YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, + MD.mReqdWorkGroupSize, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, + MD.mWorkGroupSizeHint, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, + MD.mVecTypeHint, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); + YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); + YIO.mapRequired(Kernel::Arg::Key::Kind, MD.mValueKind); + YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); + YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, + uint32_t(0)); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, + AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); + YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); + YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Metadata &MD) { + YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); + YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, + std::vector()); + if (!MD.mAttrs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); + if (!MD.mArgs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, CodeObject::Metadata &MD) { + YIO.mapRequired(Key::Version, MD.mVersion); + YIO.mapOptional(Key::Isa, MD.mIsa); + YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); + if (!MD.mKernels.empty() || !YIO.outputting()) + YIO.mapOptional(Key::Kernels, MD.mKernels); + } +}; + +} // end namespace yaml + +namespace AMDGPU { + +/* static */ +std::error_code CodeObject::Metadata::fromYamlString( + std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { + yaml::Input YamlInput(YamlString); + YamlInput >> CodeObjectMetadata; + return YamlInput.error(); +} + +/* static */ +std::error_code CodeObject::Metadata::toYamlString( + CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { + raw_string_ostream YamlStream(YamlString); + yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); + YamlOutput << CodeObjectMetadata; + return std::error_code(); +} + +namespace CodeObject { + +void MetadataStreamer::dump(StringRef YamlString) const { + errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n'; +} + +void MetadataStreamer::verify(StringRef YamlString) const { + errs() << "AMDGPU Code Object Metadata Parser Test: "; + + CodeObject::Metadata FromYamlString; + if (Metadata::fromYamlString(YamlString, FromYamlString)) { + errs() << "FAIL\n"; + return; + } + + std::string ToYamlString; + if (Metadata::toYamlString(FromYamlString, ToYamlString)) { + errs() << "FAIL\n"; + return; + } + + errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n'; + if (YamlString != ToYamlString) { + errs() << "Original input: " << YamlString << '\n' + << "Produced output: " << ToYamlString << '\n'; + } +} + +AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { + if (AccQual.empty()) + return AccessQualifier::Unknown; + + return StringSwitch(AccQual) + .Case("read_only", AccessQualifier::ReadOnly) + .Case("write_only", AccessQualifier::WriteOnly) + .Case("read_write", AccessQualifier::ReadWrite) + .Default(AccessQualifier::Default); +} + +AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( + unsigned AddressSpace) const { + switch (AddressSpace) { + case AMDGPUAS::PRIVATE_ADDRESS: + return AddressSpaceQualifier::Private; + case AMDGPUAS::GLOBAL_ADDRESS: + return AddressSpaceQualifier::Global; + case AMDGPUAS::CONSTANT_ADDRESS: + return AddressSpaceQualifier::Constant; + case AMDGPUAS::LOCAL_ADDRESS: + return AddressSpaceQualifier::Local; + case AMDGPUAS::FLAT_ADDRESS: + return AddressSpaceQualifier::Generic; + case AMDGPUAS::REGION_ADDRESS: + return AddressSpaceQualifier::Region; + } + + llvm_unreachable("Unknown address space qualifier"); +} + +ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return ValueKind::Pipe; + + return StringSwitch(BaseTypeName) + .Case("sampler_t", ValueKind::Sampler) + .Case("queue_t", ValueKind::Queue) + .Cases("image1d_t", + "image1d_array_t", + "image1d_buffer_t", + "image2d_t" , + "image2d_array_t", + "image2d_array_depth_t", + "image2d_array_msaa_t" + "image2d_array_msaa_depth_t" + "image2d_depth_t", + "image2d_msaa_t", + "image2d_msaa_depth_t", + "image3d_t", ValueKind::Image) + .Default(isa(Ty) ? + (Ty->getPointerAddressSpace() == + AMDGPUAS::LOCAL_ADDRESS ? + ValueKind::DynamicSharedPointer : + ValueKind::GlobalBuffer) : + ValueKind::ByValue); +} + +ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? ValueType::I8 : ValueType::U8; + case 16: + return Signed ? ValueType::I16 : ValueType::U16; + case 32: + return Signed ? ValueType::I32 : ValueType::U32; + case 64: + return Signed ? ValueType::I64 : ValueType::U64; + default: + return ValueType::Struct; + } + } + case Type::HalfTyID: + return ValueType::F16; + case Type::FloatTyID: + return ValueType::F32; + case Type::DoubleTyID: + return ValueType::F64; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return ValueType::Struct; + } +} + +std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +std::vector MetadataStreamer::getWorkGroupDimensions( + MDNode *Node) const { + std::vector Dims; + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims.push_back(mdconst::extract(Op)->getZExtValue()); + return Dims; +} + +void MetadataStreamer::emitVersion() { + auto &Version = CodeObjectMetadata.mVersion; + + Version.push_back(MetadataVersionMajor); + Version.push_back(MetadataVersionMinor); +} + +void MetadataStreamer::emitIsa(const FeatureBitset &Features) { + auto &Isa = CodeObjectMetadata.mIsa; + + Isa.mWavefrontSize = getWavefrontSize(Features); + Isa.mLocalMemorySize = getLocalMemorySize(Features); + Isa.mEUsPerCU = getEUsPerCU(Features); + Isa.mMaxWavesPerEU = getMaxWavesPerEU(Features); + Isa.mMaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features); + Isa.mSGPRAllocGranule = getSGPRAllocGranule(Features); + Isa.mTotalNumSGPRs = getTotalNumSGPRs(Features); + Isa.mAddressableNumSGPRs = getAddressableNumSGPRs(Features); + Isa.mVGPRAllocGranule = getVGPRAllocGranule(Features); + Isa.mTotalNumVGPRs = getTotalNumVGPRs(Features); + Isa.mAddressableNumVGPRs = getAddressableNumVGPRs(Features); +} + +void MetadataStreamer::emitPrintf(const Module &Mod) { + auto &Printf = CodeObjectMetadata.mPrintf; + + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf.push_back(cast(Op->getOperand(0))->getString()); +} + +void MetadataStreamer::emitKernelLanguage(const Function &Func) { + auto &Kernel = CodeObjectMetadata.mKernels.back(); + + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kernel.mLanguage = "OpenCL C"; + Kernel.mLanguageVersion.push_back( + mdconst::extract(Op0->getOperand(0))->getZExtValue()); + Kernel.mLanguageVersion.push_back( + mdconst::extract(Op0->getOperand(1))->getZExtValue()); +} + +void MetadataStreamer::emitKernelAttrs(const Function &Func) { + auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs; + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Attrs.mVecTypeHint = getTypeName( + cast(Node->getOperand(0))->getType(), + mdconst::extract(Node->getOperand(1))->getZExtValue()); + } +} + +void MetadataStreamer::emitKernelArgs(const Function &Func) { + for (auto &Arg : Func.args()) + emitKernelArg(Arg); + + // TODO: What about other languages? + if (!Func.getParent()->getNamedMetadata("opencl.ocl.version")) + return; + + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); + + if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + return; + + auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), + AMDGPUAS::GLOBAL_ADDRESS); + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); +} + +void MetadataStreamer::emitKernelArg(const Argument &Arg) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast(Node->getOperand(ArgNo))->getString(); + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast(Node->getOperand(ArgNo))->getString(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast(Node->getOperand(ArgNo))->getString(); + + emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), + getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual, + BaseTypeName, AccQual, Name, TypeName); +} + +void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, + ValueKind ValueKind, StringRef TypeQual, + StringRef BaseTypeName, StringRef AccQual, + StringRef Name, StringRef TypeName) { + CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); + auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back(); + + Arg.mSize = DL.getTypeAllocSize(Ty); + Arg.mAlign = DL.getABITypeAlignment(Ty); + Arg.mValueKind = ValueKind; + Arg.mValueType = getValueType(Ty, BaseTypeName); + + if (auto PtrTy = dyn_cast(Ty)) { + auto ElTy = PtrTy->getElementType(); + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized()) + Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); + } + + Arg.mAccQual = getAccessQualifier(AccQual); + + if (auto PtrTy = dyn_cast(Ty)) + Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); + + SmallVector SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + auto P = StringSwitch(Key) + .Case("const", &Arg.mIsConst) + .Case("pipe", &Arg.mIsPipe) + .Case("restrict", &Arg.mIsRestrict) + .Case("volatile", &Arg.mIsVolatile) + .Default(nullptr); + if (P) + *P = true; + } + + Arg.mName = Name; + Arg.mTypeName = TypeName; +} + +void MetadataStreamer::begin(const FeatureBitset &Features, const Module &Mod) { + emitVersion(); + emitIsa(Features); + emitPrintf(Mod); +} + +void MetadataStreamer::emitKernel(const Function &Func) { + if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return; + + CodeObjectMetadata.mKernels.push_back(Kernel::Metadata()); + auto &Kernel = CodeObjectMetadata.mKernels.back(); + + Kernel.mName = Func.getName(); + emitKernelLanguage(Func); + emitKernelAttrs(Func); + emitKernelArgs(Func); +} + +ErrorOr MetadataStreamer::toYamlString() { + std::string YamlString; + if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString)) + return Error; + + if (DumpCodeObjectMetadata) + dump(YamlString); + if (VerifyCodeObjectMetadata) + verify(YamlString); + + return YamlString; +} + +ErrorOr MetadataStreamer::toYamlString( + const FeatureBitset &Features, StringRef YamlString) { + if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata)) + return Error; + + emitIsa(Features); + return toYamlString(); +} + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares functions for generating runtime metadata. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H - -#include "llvm/Support/ErrorOr.h" -#include - -namespace llvm { -class FeatureBitset; -class Module; - -/// \returns Runtime metadata as YAML string. -std::string getRuntimeMDYAMLString(const FeatureBitset &Features, - const Module &M); - -/// \returns \p YAML if \p YAML is valid runtime metadata, error otherwise. -ErrorOr getRuntimeMDYAMLString(const FeatureBitset &Features, - StringRef YAML); - -} -#endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ /dev/null @@ -1,470 +0,0 @@ -//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// -/// Generates AMDGPU runtime metadata for YAML mapping. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPURuntimeMetadata.h" -#include "MCTargetDesc/AMDGPURuntimeMD.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/YAMLTraits.h" -#include -#include -#include -#include - -using namespace llvm; -using namespace llvm::AMDGPU::IsaInfo; -using namespace ::AMDGPU::RuntimeMD; - -static cl::opt -DumpRuntimeMD("amdgpu-dump-rtmd", - cl::desc("Dump AMDGPU runtime metadata")); - -static cl::opt -CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden, - cl::desc("Check AMDGPU runtime metadata YAML parser")); - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata) - -namespace llvm { -namespace yaml { - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, KernelArg::Metadata &A) { - YamlIO.mapRequired(KeyName::ArgSize, A.Size); - YamlIO.mapRequired(KeyName::ArgAlign, A.Align); - YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U); - YamlIO.mapRequired(KeyName::ArgKind, A.Kind); - YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType); - YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string()); - YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string()); - YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL); - YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL); - YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0)); - } - static const bool flow = true; -}; - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, Kernel::Metadata &K) { - YamlIO.mapRequired(KeyName::KernelName, K.Name); - YamlIO.mapOptional(KeyName::Language, K.Language, std::string()); - YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion); - YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize); - YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint); - YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string()); - YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex, - INVALID_KERNEL_INDEX); - YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups, - uint8_t(0)); - YamlIO.mapOptional(KeyName::Args, K.Args); - } - static const bool flow = true; -}; - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, IsaInfo::Metadata &I) { - YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize); - YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize); - YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU); - YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU); - YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize, - I.MaxFlatWorkGroupSize); - YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule); - YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs); - YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs, - I.AddressableNumSGPRs); - YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule); - YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs); - YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs, - I.AddressableNumVGPRs); - } - static const bool flow = true; -}; - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, Program::Metadata &Prog) { - YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); - YamlIO.mapOptional(KeyName::IsaInfo, Prog.IsaInfo); - YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); - YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); - } - static const bool flow = true; -}; - -} // end namespace yaml -} // end namespace llvm - -// Get a vector of three integer values from MDNode \p Node; -static std::vector getThreeInt32(MDNode *Node) { - assert(Node->getNumOperands() == 3); - std::vector V; - for (const MDOperand &Op : Node->operands()) { - const ConstantInt *CI = mdconst::extract(Op); - V.push_back(CI->getZExtValue()); - } - return V; -} - -static std::string getOCLTypeName(Type *Ty, bool Signed) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return "half"; - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - return "double"; - case Type::IntegerTyID: { - if (!Signed) - return (Twine('u') + getOCLTypeName(Ty, true)).str(); - unsigned BW = Ty->getIntegerBitWidth(); - switch (BW) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BW)).str(); - } - } - case Type::VectorTyID: { - VectorType *VecTy = cast(Ty); - Type *EleTy = VecTy->getElementType(); - unsigned Size = VecTy->getVectorNumElements(); - return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); - } - default: - return "unknown"; - } -} - -static KernelArg::ValueType getRuntimeMDValueType( - Type *Ty, StringRef TypeName) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return KernelArg::F16; - case Type::FloatTyID: - return KernelArg::F32; - case Type::DoubleTyID: - return KernelArg::F64; - case Type::IntegerTyID: { - bool Signed = !TypeName.startswith("u"); - switch (Ty->getIntegerBitWidth()) { - case 8: - return Signed ? KernelArg::I8 : KernelArg::U8; - case 16: - return Signed ? KernelArg::I16 : KernelArg::U16; - case 32: - return Signed ? KernelArg::I32 : KernelArg::U32; - case 64: - return Signed ? KernelArg::I64 : KernelArg::U64; - default: - // Runtime does not recognize other integer types. Report as struct type. - return KernelArg::Struct; - } - } - case Type::VectorTyID: - return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); - case Type::PointerTyID: - return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); - default: - return KernelArg::Struct; - } -} - -static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: - return KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: - return KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: - return KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: - return KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: - return KernelArg::Region; - default: - return KernelArg::Private; - } -} - -static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL, - Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "", - StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "", - StringRef AccQual = "") { - KernelArg::Metadata Arg; - - // Set ArgSize and ArgAlign. - Arg.Size = DL.getTypeAllocSize(T); - Arg.Align = DL.getABITypeAlignment(T); - if (auto PT = dyn_cast(T)) { - auto ET = PT->getElementType(); - if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) - Arg.PointeeAlign = DL.getABITypeAlignment(ET); - } - - // Set ArgTypeName. - Arg.TypeName = TypeName; - - // Set ArgName. - Arg.Name = ArgName; - - // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe. - SmallVector SplitQ; - TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); - - for (StringRef KeyName : SplitQ) { - auto *P = StringSwitch(KeyName) - .Case("volatile", &Arg.IsVolatile) - .Case("restrict", &Arg.IsRestrict) - .Case("const", &Arg.IsConst) - .Case("pipe", &Arg.IsPipe) - .Default(nullptr); - if (P) - *P = 1; - } - - // Set ArgKind. - Arg.Kind = Kind; - - // Set ArgValueType. - Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName); - - // Set ArgAccQual. - if (!AccQual.empty()) { - Arg.AccQual = StringSwitch(AccQual) - .Case("read_only", KernelArg::ReadOnly) - .Case("write_only", KernelArg::WriteOnly) - .Case("read_write", KernelArg::ReadWrite) - .Default(KernelArg::AccNone); - } - - // Set ArgAddrQual. - if (auto *PT = dyn_cast(T)) { - Arg.AddrQual = getRuntimeAddrSpace(static_cast( - PT->getAddressSpace())); - } - - return Arg; -} - -static Kernel::Metadata getRuntimeMDForKernel(const Function &F) { - Kernel::Metadata Kernel; - Kernel.Name = F.getName(); - auto &M = *F.getParent(); - - // Set Language and LanguageVersion. - if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { - if (MD->getNumOperands() != 0) { - auto Node = MD->getOperand(0); - if (Node->getNumOperands() > 1) { - Kernel.Language = "OpenCL C"; - uint16_t Major = mdconst::extract(Node->getOperand(0)) - ->getZExtValue(); - uint16_t Minor = mdconst::extract(Node->getOperand(1)) - ->getZExtValue(); - Kernel.LanguageVersion.push_back(Major); - Kernel.LanguageVersion.push_back(Minor); - } - } - } - - const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &Arg : F.args()) { - unsigned I = Arg.getArgNo(); - Type *T = Arg.getType(); - auto TypeName = dyn_cast(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - auto BaseTypeName = cast(F.getMetadata( - "kernel_arg_base_type")->getOperand(I))->getString(); - StringRef ArgName; - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) - ArgName = cast(ArgNameMD->getOperand(I))->getString(); - auto TypeQual = cast(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - auto AccQual = cast(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - KernelArg::Kind Kind; - if (TypeQual.find("pipe") != StringRef::npos) - Kind = KernelArg::Pipe; - else Kind = StringSwitch(BaseTypeName) - .Case("sampler_t", KernelArg::Sampler) - .Case("queue_t", KernelArg::Queue) - .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", - "image2d_t" , "image2d_array_t", KernelArg::Image) - .Cases("image2d_depth_t", "image2d_array_depth_t", - "image2d_msaa_t", "image2d_array_msaa_t", - "image2d_msaa_depth_t", KernelArg::Image) - .Cases("image2d_array_msaa_depth_t", "image3d_t", - KernelArg::Image) - .Default(isa(T) ? - (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? - KernelArg::DynamicSharedPointer : - KernelArg::GlobalBuffer) : - KernelArg::ByValue); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind, - BaseTypeName, TypeName, ArgName, TypeQual, AccQual)); - } - - // Emit hidden kernel arguments for OpenCL kernels. - if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { - auto Int64T = Type::getInt64Ty(F.getContext()); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetX)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetY)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetZ)); - if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { - auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), - KernelArg::Global); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT, - KernelArg::HiddenPrintfBuffer)); - } - } - - // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint. - if (auto RWGS = F.getMetadata("reqd_work_group_size")) - Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS); - - if (auto WGSH = F.getMetadata("work_group_size_hint")) - Kernel.WorkGroupSizeHint = getThreeInt32(WGSH); - - if (auto VTH = F.getMetadata("vec_type_hint")) - Kernel.VecTypeHint = getOCLTypeName(cast( - VTH->getOperand(0))->getType(), mdconst::extract( - VTH->getOperand(1))->getZExtValue()); - - return Kernel; -} - -static void getIsaInfo(const FeatureBitset &Features, IsaInfo::Metadata &IIM) { - IIM.WavefrontSize = getWavefrontSize(Features); - IIM.LocalMemorySize = getLocalMemorySize(Features); - IIM.EUsPerCU = getEUsPerCU(Features); - IIM.MaxWavesPerEU = getMaxWavesPerEU(Features); - IIM.MaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features); - IIM.SGPRAllocGranule = getSGPRAllocGranule(Features); - IIM.TotalNumSGPRs = getTotalNumSGPRs(Features); - IIM.AddressableNumSGPRs = getAddressableNumSGPRs(Features); - IIM.VGPRAllocGranule = getVGPRAllocGranule(Features); - IIM.TotalNumVGPRs = getTotalNumVGPRs(Features); - IIM.AddressableNumVGPRs = getAddressableNumVGPRs(Features); -} - -Program::Metadata::Metadata(const std::string &YAML) { - yaml::Input Input(YAML); - Input >> *this; -} - -std::string Program::Metadata::toYAML() { - std::string Text; - raw_string_ostream Stream(Text); - yaml::Output Output(Stream, nullptr, - std::numeric_limits::max() /* do not wrap line */); - Output << *this; - return Stream.str(); -} - -Program::Metadata Program::Metadata::fromYAML(const std::string &S) { - return Program::Metadata(S); -} - -// Check if the YAML string can be parsed. -static void checkRuntimeMDYAMLString(const std::string &YAML) { - auto P = Program::Metadata::fromYAML(YAML); - auto S = P.toYAML(); - errs() << "AMDGPU runtime metadata parser test " - << (YAML == S ? "passes" : "fails") << ".\n"; - if (YAML != S) { - errs() << "First output: " << YAML << '\n' - << "Second output: " << S << '\n'; - } -} - -std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, - const Module &M) { - Program::Metadata Prog; - Prog.MDVersionSeq.push_back(MDVersion); - Prog.MDVersionSeq.push_back(MDRevision); - - getIsaInfo(Features, Prog.IsaInfo); - - // Set PrintfInfo. - if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { - for (unsigned I = 0; I < MD->getNumOperands(); ++I) { - auto Node = MD->getOperand(I); - if (Node->getNumOperands() > 0) - Prog.PrintfInfo.push_back(cast(Node->getOperand(0)) - ->getString()); - } - } - - // Set Kernels. - for (auto &F: M.functions()) { - if (!F.getMetadata("kernel_arg_type")) - continue; - Prog.Kernels.emplace_back(getRuntimeMDForKernel(F)); - } - - auto YAML = Prog.toYAML(); - - if (DumpRuntimeMD) - errs() << "AMDGPU runtime metadata:\n" << YAML << '\n'; - - if (CheckRuntimeMDParser) - checkRuntimeMDYAMLString(YAML); - - return YAML; -} - -ErrorOr llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, - StringRef YAML) { - Program::Metadata Prog; - yaml::Input Input(YAML); - Input >> Prog; - - getIsaInfo(Features, Prog.IsaInfo); - - if (Input.error()) - return Input.error(); - return Prog.toYAML(); -} Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H +#include "AMDGPUCodeObjectMetadataStreamer.h" #include "AMDKernelCodeT.h" #include "llvm/MC/MCStreamer.h" @@ -27,6 +28,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: + AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer; MCContext &getContext() const { return Streamer.getContext(); } public: @@ -47,15 +49,19 @@ virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) = 0; + virtual void EmitStartOfCodeObjectMetadata(const FeatureBitset &Features, + const Module &Mod); - /// \returns False on success, true on failure. - virtual bool EmitRuntimeMetadata(const FeatureBitset &Features, - StringRef Metadata) = 0; + virtual void EmitKernelCodeObjectMetadata(const Function &Func); + + virtual void EmitEndOfCodeObjectMetadata(const FeatureBitset &Features); + + /// \returns True on success, false on failure. + virtual bool EmitCodeObjectMetadata(const FeatureBitset &Features, + StringRef YamlString) = 0; }; -class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { +class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { formatted_raw_ostream &OS; public: AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); @@ -74,15 +80,12 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) override; - - /// \returns False on success, true on failure. - bool EmitRuntimeMetadata(const FeatureBitset &Features, - StringRef Metadata) override; + /// \returns True on success, false on failure. + bool EmitCodeObjectMetadata(const FeatureBitset &Features, + StringRef YamlString) override; }; -class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { +class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; void EmitAMDGPUNote(const MCExpr *DescSize, @@ -109,12 +112,9 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) override; - - /// \returns False on success, true on failure. - bool EmitRuntimeMetadata(const FeatureBitset &Features, - StringRef Metadata) override; + /// \returns True on success, false on failure. + bool EmitCodeObjectMetadata(const FeatureBitset &Features, + StringRef YamlString) override; }; } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,7 +27,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" -#include "AMDGPURuntimeMD.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -36,9 +35,29 @@ using namespace llvm; using namespace llvm::AMDGPU; +//===----------------------------------------------------------------------===// +// AMDGPUTargetStreamer +//===----------------------------------------------------------------------===// + AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata( + const FeatureBitset &Features, const Module &Mod) { + CodeObjectMetadataStreamer.begin(Features, Mod); +} + +void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(const Function &Func) { + CodeObjectMetadataStreamer.emitKernel(Func); +} + +void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata( + const FeatureBitset &Features) { + CodeObjectMetadataStreamer.end(); + EmitCodeObjectMetadata(Features, + CodeObjectMetadataStreamer.toYamlString().get()); +} + //===----------------------------------------------------------------------===// // AMDGPUTargetAsmStreamer //===----------------------------------------------------------------------===// @@ -93,24 +112,18 @@ OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; } -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) { - OS << "\t.amdgpu_runtime_metadata\n"; - OS << getRuntimeMDYAMLString(Features, M); - OS << "\n\t.end_amdgpu_runtime_metadata\n"; -} - -bool AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, - StringRef Metadata) { - auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata); - if (!VerifiedMetadata) - return true; +bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata( + const FeatureBitset &Features, StringRef YamlString) { + auto VerifiedYamlString = + CodeObjectMetadataStreamer.toYamlString(Features, YamlString); + if (!VerifiedYamlString) + return false; - OS << "\t.amdgpu_runtime_metadata"; - OS << VerifiedMetadata.get(); - OS << "\t.end_amdgpu_runtime_metadata\n"; + OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n'; + OS << VerifiedYamlString.get(); + OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n'; - return false; + return true; } //===----------------------------------------------------------------------===// @@ -223,11 +236,12 @@ Symbol->setBinding(ELF::STB_GLOBAL); } -bool AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, - StringRef Metadata) { - auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata); - if (!VerifiedMetadata) - return true; +bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata( + const FeatureBitset &Features, StringRef YamlString) { + auto VerifiedYamlString = + CodeObjectMetadataStreamer.toYamlString(Features, YamlString); + if (!VerifiedYamlString) + return false; // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. @@ -240,18 +254,13 @@ EmitAMDGPUNote( DescSZ, - ElfNote::NT_AMDGPU_HSA_RUNTIME_METADATA, + ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA, [&](MCELFStreamer &OS) { OS.EmitLabel(DescBegin); - OS.EmitBytes(VerifiedMetadata.get()); + OS.EmitBytes(VerifiedYamlString.get()); OS.EmitLabel(DescEnd); } ); - return false; -} - -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) { - EmitRuntimeMetadata(Features, getRuntimeMDYAMLString(Features, M)); + return true; } Index: lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -1,13 +1,12 @@ - add_llvm_library(LLVMAMDGPUDesc AMDGPUAsmBackend.cpp + AMDGPUCodeObjectMetadataStreamer.cpp AMDGPUELFObjectWriter.cpp AMDGPUELFStreamer.cpp + AMDGPUMCAsmInfo.cpp AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp - AMDGPUMCAsmInfo.cpp - AMDGPURuntimeMD.cpp AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp SIMCCodeEmitter.cpp - ) +) Index: test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll @@ -0,0 +1,1281 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +%struct.A = type { i8, float } +%opencl.image1d_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque +%opencl.queue_t = type opaque +%opencl.pipe_t = type opaque +%struct.B = type { i32 addrspace(1)*} +%opencl.clk_event_t = type opaque + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] + +; CHECK: Isa: +; CHECK: WavefrontSize: 64 +; CHECK: LocalMemorySize: 65536 +; CHECK: EUsPerCU: 4 +; CHECK: MaxWavesPerEU: 10 +; CHECK: MaxFlatWorkGroupSize: 2048 +; GFX700: SGPRAllocGranule: 8 +; GFX800: SGPRAllocGranule: 16 +; GFX900: SGPRAllocGranule: 16 +; GFX700: TotalNumSGPRs: 512 +; GFX800: TotalNumSGPRs: 800 +; GFX900: TotalNumSGPRs: 800 +; GFX700: AddressableNumSGPRs: 104 +; GFX800: AddressableNumSGPRs: 96 +; GFX900: AddressableNumSGPRs: 102 +; CHECK: VGPRAllocGranule: 4 +; CHECK: TotalNumVGPRs: 256 +; CHECK: AddressableNumVGPRs: 256 + +; CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] + +; CHECK: Kernels: + +; CHECK: - Name: test_char +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 1 +; CHECK-NEXT: Align: 1 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: char +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_char(i8 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 + !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_ushort2 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: U16 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: ushort2 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_ushort2(<2 x i16> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 + !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_int3 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: Align: 16 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int3 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_int3(<3 x i32> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 + !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_ulong4 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 32 +; CHECK-NEXT: Align: 32 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: U64 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: ulong4 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_ulong4(<4 x i64> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 + !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_half8 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: Align: 16 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: half8 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_half8(<8 x half> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 + !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_float16 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 64 +; CHECK-NEXT: Align: 64 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: F32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: float16 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_float16(<16 x float> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 + !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_double16 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 128 +; CHECK-NEXT: Align: 128 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: F64 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: double16 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_double16(<16 x double> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 + !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_pointer +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 + !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_image +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: Image +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: image2d_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 + !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_sampler +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: Sampler +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: sampler_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_sampler(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 + !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_queue +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: Queue +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: queue_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 + !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_struct +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Private +; CHECK-NEXT: TypeName: struct A +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_struct(%struct.A* byval %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 + !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_i128 +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: i128 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_i128(i128 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 + !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_multi_arg +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I16 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: short2 +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: char3 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) + !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 + !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: - Name: test_addr_space +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Constant +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: PointeeAlign: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, + i32 addrspace(2)* %c, + i32 addrspace(3)* %l) + !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 + !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: - Name: test_type_qual +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: IsVolatile: true +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: IsConst: true +; CHECK-NEXT: IsRestrict: true +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: Pipe +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: IsPipe: true +; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, + i32 addrspace(1)* %b, + %opencl.pipe_t addrspace(1)* %c) + !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 + !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { + ret void +} + +; CHECK: - Name: test_access_qual +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: Image +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: ReadOnly +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: image1d_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: Image +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: WriteOnly +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: image2d_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: Image +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: ReadWrite +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: image3d_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, + %opencl.image2d_t addrspace(1)* %wo, + %opencl.image3d_t addrspace(1)* %rw) + !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 + !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_half +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: half +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_float +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: float +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_double +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: double +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_char +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: char +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_short +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: short +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_long +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: long +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { + ret void +} + +; CHECK: - Name: test_vec_type_hint_unknown +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: VecTypeHint: unknown +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { + ret void +} + +; CHECK: - Name: test_reqd_wgs_vec_type_hint +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: ReqdWorkGroupSize: [ 1, 2, 4 ] +; CHECK-NEXT: VecTypeHint: int +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 + !reqd_work_group_size !6 { + ret void +} + +; CHECK: - Name: test_wgs_hint_vec_type_hint +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: WorkGroupSizeHint: [ 8, 16, 32 ] +; CHECK-NEXT: VecTypeHint: uint4 +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: int +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 + !work_group_size_hint !8 { + ret void +} + +; CHECK: - Name: test_arg_ptr_to_ptr +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: 'int **' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a) + !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 + !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_arg_struct_contains_ptr +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Private +; CHECK-NEXT: TypeName: struct B +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 + !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_arg_vector_of_ptr +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: Align: 16 +; CHECK-NEXT: Kind: ByValue +; CHECK-NEXT: ValueType: I32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: TypeName: 'global int* __attribute__((ext_vector_type(2)))' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 + !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_arg_unknown_builtin_type +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: clk_event_t +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_arg_unknown_builtin_type( + %opencl.clk_event_t addrspace(1)* %a) + !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 + !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: - Name: test_pointee_align +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: GlobalBuffer +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: 'long *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 1 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'char *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 2 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'char2 *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'char3 *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'char4 *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 8 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'char8 *' +; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: Kind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 16 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: TypeName: 'char16 *' +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: Kind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, + i8 addrspace(3)* %b, + <2 x i8> addrspace(3)* %c, + <3 x i8> addrspace(3)* %d, + <4 x i8> addrspace(3)* %e, + <8 x i8> addrspace(3)* %f, + <16 x i8> addrspace(3)* %g) + !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 + !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { + ret void +} + +!llvm.printf.fmts = !{!100, !101} + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"int"} +!4 = !{!""} +!5 = !{i32 undef, i32 1} +!6 = !{i32 1, i32 2, i32 4} +!7 = !{<4 x i32> undef, i32 0} +!8 = !{i32 8, i32 16, i32 32} +!9 = !{!"char"} +!10 = !{!"ushort2"} +!11 = !{!"int3"} +!12 = !{!"ulong4"} +!13 = !{!"half8"} +!14 = !{!"float16"} +!15 = !{!"double16"} +!16 = !{!"int *"} +!17 = !{!"image2d_t"} +!18 = !{!"sampler_t"} +!19 = !{!"queue_t"} +!20 = !{!"struct A"} +!21 = !{!"i128"} +!22 = !{i32 0, i32 0, i32 0} +!23 = !{!"none", !"none", !"none"} +!24 = !{!"int", !"short2", !"char3"} +!25 = !{!"", !"", !""} +!26 = !{half undef, i32 1} +!27 = !{float undef, i32 1} +!28 = !{double undef, i32 1} +!29 = !{i8 undef, i32 1} +!30 = !{i16 undef, i32 1} +!31 = !{i64 undef, i32 1} +!32 = !{i32 *undef, i32 1} +!50 = !{i32 1, i32 2, i32 3} +!51 = !{!"int *", !"int *", !"int *"} +!60 = !{i32 1, i32 1, i32 1} +!61 = !{!"read_only", !"write_only", !"read_write"} +!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} +!70 = !{!"volatile", !"const restrict", !"pipe"} +!80 = !{!"int **"} +!81 = !{i32 1} +!82 = !{!"struct B"} +!83 = !{!"global int* __attribute__((ext_vector_type(2)))"} +!84 = !{!"clk_event_t"} +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} +!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3} +!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} +!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"} +!94 = !{!"", !"", !"", !"", !"", !"", !""} +!100 = !{!"1:1:4:%d\5Cn"} +!101 = !{!"2:1:8:%g\5Cn"} + +; NOTES: Displaying notes found at file offset 0x{{[0-9]+}} +; NOTES-NEXT: Owner Data size Description +; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001) +; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003) +; GFX700: AMD 0x000078b2 Unknown note type: (0x0000000a) +; GFX800: AMD 0x000078b2 Unknown note type: (0x0000000a) +; GFX900: AMD 0x000078b3 Unknown note type: (0x0000000a) + +; PARSER: AMDGPU Code Object Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] +; CHECK: Isa: +; CHECK: WavefrontSize: 64 +; CHECK: LocalMemorySize: 65536 +; CHECK: EUsPerCU: 4 +; CHECK: MaxWavesPerEU: 10 +; CHECK: MaxFlatWorkGroupSize: 2048 +; CHECK: SGPRAllocGranule: 8 +; CHECK: TotalNumSGPRs: 512 +; CHECK: AddressableNumSGPRs: 104 +; CHECK: VGPRAllocGranule: 4 +; CHECK: TotalNumVGPRs: 256 +; CHECK: AddressableNumVGPRs: 256 +; CHECK: ... + +!opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] +; CHECK: Isa: +; CHECK: WavefrontSize: 64 +; CHECK: LocalMemorySize: 65536 +; CHECK: EUsPerCU: 4 +; CHECK: MaxWavesPerEU: 10 +; CHECK: MaxFlatWorkGroupSize: 2048 +; CHECK: SGPRAllocGranule: 8 +; CHECK: TotalNumSGPRs: 512 +; CHECK: AddressableNumSGPRs: 104 +; CHECK: VGPRAllocGranule: 4 +; CHECK: TotalNumVGPRs: 256 +; CHECK: AddressableNumVGPRs: 256 +; CHECK: ... + +!opencl.ocl.version = !{!0} +!0 = !{} Index: test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] +; CHECK: Isa: +; CHECK: WavefrontSize: 64 +; CHECK: LocalMemorySize: 65536 +; CHECK: EUsPerCU: 4 +; CHECK: MaxWavesPerEU: 10 +; CHECK: MaxFlatWorkGroupSize: 2048 +; CHECK: SGPRAllocGranule: 8 +; CHECK: TotalNumSGPRs: 512 +; CHECK: AddressableNumSGPRs: 104 +; CHECK: VGPRAllocGranule: 4 +; CHECK: TotalNumVGPRs: 256 +; CHECK: AddressableNumVGPRs: 256 +; CHECK: ... + +!opencl.ocl.version = !{!0} +!0 = !{i32 1} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s -; check llc does not crash for invalid opencl version metadata - -; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } } - -!opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s -; check llc does not crash for invalid opencl version metadata - -; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } } - -!opencl.ocl.version = !{!0} -!0 = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s -; check llc does not crash for invalid opencl version metadata - -; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } } - -!opencl.ocl.version = !{!0} -!0 = !{i32 1} Index: test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata.ll +++ /dev/null @@ -1,406 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s - -%struct.A = type { i8, float } -%opencl.image1d_t = type opaque -%opencl.image2d_t = type opaque -%opencl.image3d_t = type opaque -%opencl.queue_t = type opaque -%opencl.pipe_t = type opaque -%struct.B = type { i32 addrspace(1)*} -%opencl.clk_event_t = type opaque - -; CHECK: --- -; SI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: -; VI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -; CHECK: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_int3, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_ulong4, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 32, amd.ArgAlign: 32, amd.ArgKind: 0, amd.ArgValueType: 10, amd.ArgTypeName: ulong4, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_half8, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 5, amd.ArgTypeName: half8, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_float16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 64, amd.ArgAlign: 64, amd.ArgKind: 0, amd.ArgValueType: 8, amd.ArgTypeName: float16, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_double16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 128, amd.ArgAlign: 128, amd.ArgKind: 0, amd.ArgValueType: 11, amd.ArgTypeName: double16, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_pointer, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_image, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_sampler, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 3, amd.ArgValueType: 6, amd.ArgTypeName: sampler_t, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_queue, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 6, amd.ArgValueType: 0, amd.ArgTypeName: queue_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_struct, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct A, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_i128, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 8, amd.ArgKind: 0, amd.ArgValueType: 0, amd.ArgTypeName: i128, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_multi_arg, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 3, amd.ArgTypeName: short2, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_addr_space, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 2, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_type_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsVolatile: 1 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsConst: 1, amd.ArgIsRestrict: 1 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 5, amd.ArgValueType: 0, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsPipe: 1 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_access_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image1d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 1 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 2 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image3d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 3 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_half, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: half, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_float, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: float, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_double, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: double, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: char, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_short, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: short, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_long, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: long, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_unknown, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: unknown, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_reqd_wgs_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.ReqdWorkGroupSize: [ 1, 2, 4 ], amd.VecTypeHint: int, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_wgs_hint_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.WorkGroupSizeHint: [ 8, 16, 32 ], amd.VecTypeHint: uint4, amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_arg_ptr_to_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int **', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { - ret void -} -; CHECK-NEXT: - { amd.KernelName: test_arg_struct_contains_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct B, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_arg_vector_of_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: 'global int* __attribute__((ext_vector_type(2)))', amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { - ret void -} - - -; CHECK-NEXT: - { amd.KernelName: test_arg_unknown_builtin_type, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: clk_event_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK-NEXT: - { amd.KernelName: test_pointee_align, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 9, amd.ArgTypeName: 'long *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 1, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 2, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char2 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char3 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char4 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 8, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char8 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 16, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char16 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } } -define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { - ret void -} - -; CHECK-NEXT:... - -; PARSER: AMDGPU runtime metadata parser test passes. - -; NOTES: Displaying notes found at file offset 0x{{[0-9]+}} -; NOTES-NEXT: Owner Data size Description -; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001) -; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003) - -; SI: AMD 0x0000530d Unknown note type: (0x00000008) -; VI: AMD 0x0000530e Unknown note type: (0x00000008) - -!llvm.printf.fmts = !{!100, !101} - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"int"} -!4 = !{!""} -!5 = !{i32 undef, i32 1} -!6 = !{i32 1, i32 2, i32 4} -!7 = !{<4 x i32> undef, i32 0} -!8 = !{i32 8, i32 16, i32 32} -!9 = !{!"char"} -!10 = !{!"ushort2"} -!11 = !{!"int3"} -!12 = !{!"ulong4"} -!13 = !{!"half8"} -!14 = !{!"float16"} -!15 = !{!"double16"} -!16 = !{!"int *"} -!17 = !{!"image2d_t"} -!18 = !{!"sampler_t"} -!19 = !{!"queue_t"} -!20 = !{!"struct A"} -!21 = !{!"i128"} -!22 = !{i32 0, i32 0, i32 0} -!23 = !{!"none", !"none", !"none"} -!24 = !{!"int", !"short2", !"char3"} -!25 = !{!"", !"", !""} -!26 = !{half undef, i32 1} -!27 = !{float undef, i32 1} -!28 = !{double undef, i32 1} -!29 = !{i8 undef, i32 1} -!30 = !{i16 undef, i32 1} -!31 = !{i64 undef, i32 1} -!32 = !{i32 *undef, i32 1} -!50 = !{i32 1, i32 2, i32 3} -!51 = !{!"int *", !"int *", !"int *"} -!60 = !{i32 1, i32 1, i32 1} -!61 = !{!"read_only", !"write_only", !"read_write"} -!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} -!70 = !{!"volatile", !"const restrict", !"pipe"} -!80 = !{!"int **"} -!81 = !{i32 1} -!82 = !{!"struct B"} -!83 = !{!"global int* __attribute__((ext_vector_type(2)))"} -!84 = !{!"clk_event_t"} -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} -!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3} -!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"} -!94 = !{!"", !"", !"", !"", !"", !"", !""} -!100 = !{!"1:1:4:%d\5Cn"} -!101 = !{!"2:1:8:%g\5Cn"} Index: test/MC/AMDGPU/code-object-metadata-isa.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/code-object-metadata-isa.s @@ -0,0 +1,98 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_code_object_metadata +// CHECK: Version: [ 1, 0 ] +// CHECK: Isa: +// CHECK: WavefrontSize: 64 +// CHECK: LocalMemorySize: 65536 +// CHECK: EUsPerCU: 4 +// CHECK: MaxWavesPerEU: 10 +// CHECK: MaxFlatWorkGroupSize: 2048 +// GFX700: SGPRAllocGranule: 8 +// GFX800: SGPRAllocGranule: 16 +// GFX900: SGPRAllocGranule: 16 +// GFX700: TotalNumSGPRs: 512 +// GFX800: TotalNumSGPRs: 800 +// GFX900: TotalNumSGPRs: 800 +// GFX700: AddressableNumSGPRs: 104 +// GFX800: AddressableNumSGPRs: 96 +// GFX900: AddressableNumSGPRs: 102 +// CHECK: VGPRAllocGranule: 4 +// CHECK: TotalNumVGPRs: 256 +// CHECK: AddressableNumVGPRs: 256 +// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] +// CHECK: Kernels: +// CHECK: - Name: test_kernel +// CHECK: Language: OpenCL C +// CHECK: LanguageVersion: [ 2, 0 ] +// CHECK: Args: +// CHECK: - Size: 1 +// CHECK: Align: 1 +// CHECK: Kind: ByValue +// CHECK: ValueType: I8 +// CHECK: AccQual: Default +// CHECK: TypeName: char +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenGlobalOffsetX +// CHECK: ValueType: I64 +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenGlobalOffsetY +// CHECK: ValueType: I64 +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenGlobalOffsetZ +// CHECK: ValueType: I64 +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenPrintfBuffer +// CHECK: ValueType: I8 +// CHECK: AddrSpaceQual: Global +// CHECK: .end_amdgpu_code_object_metadata +.amdgpu_code_object_metadata + Version: [ 1, 0 ] + Isa: + WavefrontSize: 1 + LocalMemorySize: 1 + EUsPerCU: 1 + MaxWavesPerEU: 1 + MaxFlatWorkGroupSize: 1 + SGPRAllocGranule: 1 + TotalNumSGPRs: 1 + AddressableNumSGPRs: 1 + VGPRAllocGranule: 1 + TotalNumVGPRs: 1 + AddressableNumVGPRs: 1 + Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] + Kernels: + - Name: test_kernel + Language: OpenCL C + LanguageVersion: [ 2, 0 ] + Args: + - Size: 1 + Align: 1 + Kind: ByValue + ValueType: I8 + AccQual: Default + TypeName: char + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetX + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetY + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetZ + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenPrintfBuffer + ValueType: I8 + AddrSpaceQual: Global +.end_amdgpu_code_object_metadata Index: test/MC/AMDGPU/code-object-metadata-kernel-args.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/code-object-metadata-kernel-args.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_code_object_metadata +// CHECK: Version: [ 1, 0 ] +// CHECK: Isa: +// CHECK: WavefrontSize: 64 +// CHECK: LocalMemorySize: 65536 +// CHECK: EUsPerCU: 4 +// CHECK: MaxWavesPerEU: 10 +// CHECK: MaxFlatWorkGroupSize: 2048 +// GFX700: SGPRAllocGranule: 8 +// GFX800: SGPRAllocGranule: 16 +// GFX900: SGPRAllocGranule: 16 +// GFX700: TotalNumSGPRs: 512 +// GFX800: TotalNumSGPRs: 800 +// GFX900: TotalNumSGPRs: 800 +// GFX700: AddressableNumSGPRs: 104 +// GFX800: AddressableNumSGPRs: 96 +// GFX900: AddressableNumSGPRs: 102 +// CHECK: VGPRAllocGranule: 4 +// CHECK: TotalNumVGPRs: 256 +// CHECK: AddressableNumVGPRs: 256 +// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] +// CHECK: Kernels: +// CHECK: - Name: test_kernel +// CHECK: Language: OpenCL C +// CHECK: LanguageVersion: [ 2, 0 ] +// CHECK: Args: +// CHECK: - Size: 1 +// CHECK: Align: 1 +// CHECK: Kind: ByValue +// CHECK: ValueType: I8 +// CHECK: AccQual: Default +// CHECK: TypeName: char +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenGlobalOffsetX +// CHECK: ValueType: I64 +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenGlobalOffsetY +// CHECK: ValueType: I64 +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenGlobalOffsetZ +// CHECK: ValueType: I64 +// CHECK: - Size: 8 +// CHECK: Align: 8 +// CHECK: Kind: HiddenPrintfBuffer +// CHECK: ValueType: I8 +// CHECK: AddrSpaceQual: Global +// CHECK: .end_amdgpu_code_object_metadata +.amdgpu_code_object_metadata + Version: [ 1, 0 ] + Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] + Kernels: + - Name: test_kernel + Language: OpenCL C + LanguageVersion: [ 2, 0 ] + Args: + - Size: 1 + Align: 1 + Kind: ByValue + ValueType: I8 + AccQual: Default + TypeName: char + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetX + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetY + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetZ + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenPrintfBuffer + ValueType: I8 + AddrSpaceQual: Global +.end_amdgpu_code_object_metadata Index: test/MC/AMDGPU/code-object-metadata-kernel-attrs.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/code-object-metadata-kernel-attrs.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_code_object_metadata +// CHECK: Version: [ 1, 0 ] +// CHECK: Isa: +// CHECK: WavefrontSize: 64 +// CHECK: LocalMemorySize: 65536 +// CHECK: EUsPerCU: 4 +// CHECK: MaxWavesPerEU: 10 +// CHECK: MaxFlatWorkGroupSize: 2048 +// GFX700: SGPRAllocGranule: 8 +// GFX800: SGPRAllocGranule: 16 +// GFX900: SGPRAllocGranule: 16 +// GFX700: TotalNumSGPRs: 512 +// GFX800: TotalNumSGPRs: 800 +// GFX900: TotalNumSGPRs: 800 +// GFX700: AddressableNumSGPRs: 104 +// GFX800: AddressableNumSGPRs: 96 +// GFX900: AddressableNumSGPRs: 102 +// CHECK: VGPRAllocGranule: 4 +// CHECK: TotalNumVGPRs: 256 +// CHECK: AddressableNumVGPRs: 256 +// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] +// CHECK: Kernels: +// CHECK: - Name: test_kernel +// CHECK: Language: OpenCL C +// CHECK: LanguageVersion: [ 2, 0 ] +// CHECK: Attrs: +// CHECK: ReqdWorkGroupSize: [ 1, 2, 4 ] +// CHECK: WorkGroupSizeHint: [ 8, 16, 32 ] +// CHECK: VecTypeHint: int +// CHECK: .end_amdgpu_code_object_metadata +.amdgpu_code_object_metadata + Version: [ 1, 0 ] + Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] + Kernels: + - Name: test_kernel + Language: OpenCL C + LanguageVersion: [ 2, 0 ] + Attrs: + ReqdWorkGroupSize: [ 1, 2, 4 ] + WorkGroupSizeHint: [ 8, 16, 32 ] + VecTypeHint: int +.end_amdgpu_code_object_metadata Index: test/MC/AMDGPU/code-object-metadata-unknown-key.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/code-object-metadata-unknown-key.s @@ -0,0 +1,41 @@ +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s + +// CHECK: error: unknown key 'UnknownKey' +.amdgpu_code_object_metadata + UnknownKey: [ 2, 0 ] + Version: [ 1, 0 ] + Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] + Kernels: + - Name: test_kernel + Language: OpenCL C + LanguageVersion: [ 2, 0 ] + Args: + - Size: 1 + Align: 1 + Kind: ByValue + ValueType: I8 + AccQual: Default + TypeName: char + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetX + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetY + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenGlobalOffsetZ + ValueType: I64 + - Size: 8 + Align: 8 + Kind: HiddenPrintfBuffer + ValueType: I8 + AddrSpaceQual: Global +.end_amdgpu_code_object_metadata Index: test/MC/AMDGPU/hsa.s =================================================================== --- test/MC/AMDGPU/hsa.s +++ test/MC/AMDGPU/hsa.s @@ -37,25 +37,31 @@ .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -.amdgpu_runtime_metadata - { - amd.MDVersion: [ 2, 0 ] - amd.Kernels: [ - { amd.KernelName: amd_kernel_code_t_test_all }, - { amd.KernelName: amd_kernel_code_t_minimal } - ] - } -.end_amdgpu_runtime_metadata +.amdgpu_code_object_metadata + Version: [ 3, 0 ] + Kernels: + - Name: amd_kernel_code_t_test_all + - Name: amd_kernel_code_t_minimal +.end_amdgpu_code_object_metadata -// ASM: .amdgpu_runtime_metadata -// ASM: { -// ASM: amd.MDVersion: [ 2, 0 ] -// ASM: amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, -// ASM: amd.Kernels: -// ASM: - { amd.KernelName: amd_kernel_code_t_test_all } -// ASM: - { amd.KernelName: amd_kernel_code_t_minimal } -// ASM: } -// ASM: .end_amdgpu_runtime_metadata +// ASM: .amdgpu_code_object_metadata +// ASM: Version: [ 3, 0 ] +// ASM: Isa: +// ASM: WavefrontSize: 64 +// ASM: LocalMemorySize: 65536 +// ASM: EUsPerCU: 4 +// ASM: MaxWavesPerEU: 10 +// ASM: MaxFlatWorkGroupSize: 2048 +// ASM: SGPRAllocGranule: 8 +// ASM: TotalNumSGPRs: 512 +// ASM: AddressableNumSGPRs: 104 +// ASM: VGPRAllocGranule: 4 +// ASM: TotalNumVGPRs: 256 +// ASM: AddressableNumVGPRs: 256 +// ASM: Kernels: +// ASM: - Name: amd_kernel_code_t_test_all +// ASM: - Name: amd_kernel_code_t_minimal +// ASM: .end_amdgpu_code_object_metadata .amdgpu_hsa_kernel amd_kernel_code_t_test_all .amdgpu_hsa_kernel amd_kernel_code_t_minimal Index: test/MC/AMDGPU/runtime-metadata-1.s =================================================================== --- test/MC/AMDGPU/runtime-metadata-1.s +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700 -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800 -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900 - -.amdgpu_runtime_metadata - { amd.MDVersion: [ 2, 1 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: - - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } - - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: - - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } - } -.end_amdgpu_runtime_metadata - -// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -// GCN: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -// GCN-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -// GCN-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } } Index: test/MC/AMDGPU/runtime-metadata-2.s =================================================================== --- test/MC/AMDGPU/runtime-metadata-2.s +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700 -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800 -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900 - -.amdgpu_runtime_metadata - { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: - - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } - - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: - - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } - } -.end_amdgpu_runtime_metadata - -// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - -// GCN: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -// GCN-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -// GCN-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } } Index: test/MC/AMDGPU/runtime-metadata-invalid-1.s =================================================================== --- test/MC/AMDGPU/runtime-metadata-invalid-1.s +++ /dev/null @@ -1,106 +0,0 @@ -; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s -; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s -; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s -; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s -; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s -; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s - -; CHECK: error: unknown key 'amd.RandomUnknownKey' - - .text - .hsa_code_object_version 2,1 - .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" - .amdgpu_runtime_metadata ---- -{ amd.MDVersion: [ 2, 1 ], amd.RandomUnknownKey, amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.Kernels: - - { amd.KernelName: test, amd.Language: OpenCL C, amd.LanguageVersion: [ 1, 0 ], amd.Args: - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int*', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } } } -... - - .end_amdgpu_runtime_metadata - .globl test - .p2align 8 - .type test,@function - .amdgpu_hsa_kernel test -test: ; @test - .amd_kernel_code_t - amd_code_version_major = 1 - amd_code_version_minor = 0 - amd_machine_kind = 1 - amd_machine_version_major = 8 - amd_machine_version_minor = 0 - amd_machine_version_stepping = 3 - kernel_code_entry_byte_offset = 256 - kernel_code_prefetch_byte_size = 0 - max_scratch_backing_memory_byte_size = 0 - granulated_workitem_vgpr_count = 0 - granulated_wavefront_sgpr_count = 0 - priority = 0 - float_mode = 192 - priv = 0 - enable_dx10_clamp = 1 - debug_mode = 0 - enable_ieee_mode = 1 - enable_sgpr_private_segment_wave_byte_offset = 0 - user_sgpr_count = 6 - enable_trap_handler = 1 - enable_sgpr_workgroup_id_x = 1 - enable_sgpr_workgroup_id_y = 0 - enable_sgpr_workgroup_id_z = 0 - enable_sgpr_workgroup_info = 0 - enable_vgpr_workitem_id = 0 - enable_exception_msb = 0 - granulated_lds_size = 0 - enable_exception = 0 - enable_sgpr_private_segment_buffer = 1 - enable_sgpr_dispatch_ptr = 0 - enable_sgpr_queue_ptr = 0 - enable_sgpr_kernarg_segment_ptr = 1 - enable_sgpr_dispatch_id = 0 - enable_sgpr_flat_scratch_init = 0 - enable_sgpr_private_segment_size = 0 - enable_sgpr_grid_workgroup_count_x = 0 - enable_sgpr_grid_workgroup_count_y = 0 - enable_sgpr_grid_workgroup_count_z = 0 - enable_ordered_append_gds = 0 - private_element_size = 1 - is_ptr64 = 1 - is_dynamic_callstack = 0 - is_debug_enabled = 0 - is_xnack_enabled = 0 - workitem_private_segment_byte_size = 0 - workgroup_group_segment_byte_size = 0 - gds_segment_byte_size = 0 - kernarg_segment_byte_size = 8 - workgroup_fbarrier_count = 0 - wavefront_sgpr_count = 6 - workitem_vgpr_count = 3 - reserved_vgpr_first = 0 - reserved_vgpr_count = 0 - reserved_sgpr_first = 0 - reserved_sgpr_count = 0 - debug_wavefront_private_segment_offset_sgpr = 0 - debug_private_segment_buffer_sgpr = 0 - kernarg_segment_alignment = 4 - group_segment_alignment = 4 - private_segment_alignment = 4 - wavefront_size = 6 - call_convention = -1 - runtime_loader_kernel_symbol = 0 - .end_amd_kernel_code_t -; BB#0: ; %entry - s_load_dwordx2 s[0:1], s[4:5], 0x0 - v_mov_b32_e32 v2, 0x309 - s_waitcnt lgkmcnt(0) - v_mov_b32_e32 v0, s0 - v_mov_b32_e32 v1, s1 - flat_store_dword v[0:1], v2 - s_endpgm -.Lfunc_end0: - .size test, .Lfunc_end0-test - - .ident "" - .section ".note.GNU-stack" Index: tools/llvm-readobj/ELFDumper.cpp =================================================================== --- tools/llvm-readobj/ELFDumper.cpp +++ tools/llvm-readobj/ELFDumper.cpp @@ -129,7 +129,7 @@ void printMipsReginfo() override; void printMipsOptions() override; - void printAMDGPURuntimeMD() override; + void printAMDGPUCodeObjectMetadata() override; void printStackMap() const override; @@ -2357,7 +2357,7 @@ } } -template void ELFDumper::printAMDGPURuntimeMD() { +template void ELFDumper::printAMDGPUCodeObjectMetadata() { const Elf_Shdr *Shdr = findSectionByName(*Obj, ".note"); if (!Shdr) { W.startLine() << "There is no .note section in the file.\n"; @@ -2365,7 +2365,7 @@ } ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); - const uint32_t RuntimeMDNoteType = 8; + const uint32_t RuntimeMDNoteType = 10; for (auto I = reinterpret_cast(&Sec[0]), E = I + Sec.size()/4; I != E;) { uint32_t NameSZ = I[0]; Index: tools/llvm-readobj/ObjDumper.h =================================================================== --- tools/llvm-readobj/ObjDumper.h +++ tools/llvm-readobj/ObjDumper.h @@ -59,7 +59,7 @@ virtual void printMipsOptions() { } // Only implemented for AMDGPU ELF at this time. - virtual void printAMDGPURuntimeMD() {} + virtual void printAMDGPUCodeObjectMetadata() {} // Only implemented for PE/COFF. virtual void printCOFFImports() { } Index: tools/llvm-readobj/llvm-readobj.cpp =================================================================== --- tools/llvm-readobj/llvm-readobj.cpp +++ tools/llvm-readobj/llvm-readobj.cpp @@ -186,9 +186,10 @@ cl::opt MipsOptions("mips-options", cl::desc("Display the MIPS .MIPS.options section")); - // -amdgpu-runtime-metadata - cl::opt AMDGPURuntimeMD("amdgpu-runtime-metadata", - cl::desc("Display AMDGPU runtime metadata")); + // -amdgpu-code-object-metadata + cl::opt AMDGPUCodeObjectMetadata( + "amdgpu-code-object-metadata", + cl::desc("Display AMDGPU code object metadata")); // -coff-imports cl::opt @@ -422,8 +423,8 @@ Dumper->printMipsOptions(); } if (Obj->getArch() == llvm::Triple::amdgcn) - if (opts::AMDGPURuntimeMD) - Dumper->printAMDGPURuntimeMD(); + if (opts::AMDGPUCodeObjectMetadata) + Dumper->printAMDGPUCodeObjectMetadata(); if (opts::SectionGroups) Dumper->printGroupSections(); if (opts::HashHistogram)