Index: include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h =================================================================== --- /dev/null +++ include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h @@ -0,0 +1,70 @@ +//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This is a verifier for AMDGPU HSA metadata, which can verify both +/// well-typed metadata and untyped metadata. When verifying in the non-strict +/// mode, untyped metadata is coerced into the correct type if possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H +#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H + +#include "llvm/BinaryFormat/MsgPackTypes.h" + +namespace llvm { +namespace AMDGPU { +namespace HSAMD { +namespace V3 { + +/// Verifier for AMDGPU HSA metadata. +/// +/// Operates in two modes: +/// +/// In strict mode, metadata must already be well-typed. +/// +/// In non-strict mode, metadata is coerced into expected types when possible. +class MetadataVerifier { + bool Strict; + + bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue = {}); + bool verifyInteger(msgpack::Node &Node); + bool verifyArray(msgpack::Node &Node, + function_ref verifyNode, + Optional Size = None); + bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required, + function_ref verifyNode); + bool + verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required, + msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue = {}); + bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key, + bool Required); + bool verifyKernelArgs(msgpack::Node &Node); + bool verifyKernel(msgpack::Node &Node); + +public: + /// Construct a MetadataVerifier, specifying whether it will operate in \p + /// Strict mode. + MetadataVerifier(bool Strict) : Strict(Strict) {} + + /// Verify given HSA metadata. + /// + /// \returns True when successful, false when metadata is invalid. + bool verify(msgpack::Node &HSAMetadataRoot); +}; + +} // end namespace V3 +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H Index: include/llvm/BinaryFormat/ELF.h =================================================================== --- include/llvm/BinaryFormat/ELF.h +++ include/llvm/BinaryFormat/ELF.h @@ -1325,7 +1325,7 @@ GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1 }; -// AMDGPU specific notes. +// AMD specific notes. (Code Object V2) enum { // Note types with values between 0 and 9 (inclusive) are reserved. NT_AMD_AMDGPU_HSA_METADATA = 10, @@ -1333,6 +1333,12 @@ NT_AMD_AMDGPU_PAL_METADATA = 12 }; +// AMDGPU specific notes. (Code Object V3) +enum { + // Note types with values between 0 and 31 (inclusive) are reserved. + NT_AMDGPU_METADATA = 32 +}; + enum { GNU_ABI_TAG_LINUX = 0, GNU_ABI_TAG_HURD = 1, Index: include/llvm/Support/AMDGPUMetadata.h =================================================================== --- include/llvm/Support/AMDGPUMetadata.h +++ include/llvm/Support/AMDGPUMetadata.h @@ -431,6 +431,21 @@ /// Converts \p HSAMetadata to \p String. std::error_code toString(Metadata HSAMetadata, std::string &String); +//===----------------------------------------------------------------------===// +// HSA metadata for v3 code object. +//===----------------------------------------------------------------------===// +namespace V3 { +/// HSA metadata major version. +constexpr uint32_t VersionMajor = 1; +/// HSA metadata minor version. +constexpr uint32_t VersionMinor = 0; + +/// HSA metadata beginning assembler directive. +constexpr char AssemblerDirectiveBegin[] = ".amdgpu_metadata"; +/// HSA metadata ending assembler directive. +constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata"; +} // end namespace V3 + } // end namespace HSAMD //===----------------------------------------------------------------------===// Index: lib/BinaryFormat/AMDGPUMetadataVerifier.cpp =================================================================== --- /dev/null +++ lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -0,0 +1,324 @@ +//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Implements a verifier for AMDGPU HSA metadata. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" +#include "llvm/Support/AMDGPUMetadata.h" + +namespace llvm { +namespace AMDGPU { +namespace HSAMD { +namespace V3 { + +bool MetadataVerifier::verifyScalar( + msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue) { + auto ScalarPtr = dyn_cast(&Node); + if (!ScalarPtr) + return false; + auto &Scalar = *ScalarPtr; + // Do not output extraneous tags for types we know from the spec. + Scalar.IgnoreTag = true; + if (Scalar.getScalarKind() != SKind) { + if (Strict) + return false; + // If we are not strict, we interpret string values as "implicitly typed" + // and attempt to coerce them to the expected type here. + if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String) + return false; + std::string StringValue = Scalar.getString(); + Scalar.setScalarKind(SKind); + if (Scalar.inputYAML(StringValue) != StringRef()) + return false; + } + if (verifyValue) + return verifyValue(Scalar); + return true; +} + +bool MetadataVerifier::verifyInteger(msgpack::Node &Node) { + if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt)) + if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int)) + return false; + return true; +} + +bool MetadataVerifier::verifyArray( + msgpack::Node &Node, function_ref verifyNode, + Optional Size) { + auto ArrayPtr = dyn_cast(&Node); + if (!ArrayPtr) + return false; + auto &Array = *ArrayPtr; + if (Size && Array.size() != *Size) + return false; + for (auto &Item : Array) + if (!verifyNode(*Item.get())) + return false; + + return true; +} + +bool MetadataVerifier::verifyEntry( + msgpack::MapNode &MapNode, StringRef Key, bool Required, + function_ref verifyNode) { + auto Entry = MapNode.find(Key); + if (Entry == MapNode.end()) + return !Required; + return verifyNode(*Entry->second.get()); +} + +bool MetadataVerifier::verifyScalarEntry( + msgpack::MapNode &MapNode, StringRef Key, bool Required, + msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue) { + return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) { + return verifyScalar(Node, SKind, verifyValue); + }); +} + +bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode, + StringRef Key, bool Required) { + return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) { + return verifyInteger(Node); + }); +} + +bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) { + auto ArgsMapPtr = dyn_cast(&Node); + if (!ArgsMapPtr) + return false; + auto &ArgsMap = *ArgsMapPtr; + + if (!verifyScalarEntry(ArgsMap, ".name", false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(ArgsMap, ".type_name", false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyIntegerEntry(ArgsMap, ".size", true)) + return false; + if (!verifyIntegerEntry(ArgsMap, ".offset", true)) + return false; + if (!verifyScalarEntry(ArgsMap, ".value_kind", true, + msgpack::ScalarNode::SK_String, + [](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case("by_value", true) + .Case("global_buffer", true) + .Case("dynamic_shared_pointer", true) + .Case("sampler", true) + .Case("image", true) + .Case("pipe", true) + .Case("queue", true) + .Case("hidden_global_offset_x", true) + .Case("hidden_global_offset_y", true) + .Case("hidden_global_offset_z", true) + .Case("hidden_none", true) + .Case("hidden_printf_buffer", true) + .Case("hidden_default_queue", true) + .Case("hidden_completion_action", true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, ".value_type", true, + msgpack::ScalarNode::SK_String, + [](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case("struct", true) + .Case("i8", true) + .Case("u8", true) + .Case("i16", true) + .Case("u16", true) + .Case("f16", true) + .Case("i32", true) + .Case("u32", true) + .Case("f32", true) + .Case("i64", true) + .Case("u64", true) + .Case("f64", true) + .Default(false); + })) + return false; + if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false)) + return false; + if (!verifyScalarEntry(ArgsMap, ".address_space", false, + msgpack::ScalarNode::SK_String, + [](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case("private", true) + .Case("global", true) + .Case("constant", true) + .Case("local", true) + .Case("generic", true) + .Case("region", true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, ".access", false, + msgpack::ScalarNode::SK_String, + [](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case("read_only", true) + .Case("write_only", true) + .Case("read_write", true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, ".actual_access", false, + msgpack::ScalarNode::SK_String, + [](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case("read_only", true) + .Case("write_only", true) + .Case("read_write", true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, ".is_const", false, + msgpack::ScalarNode::SK_Boolean)) + return false; + if (!verifyScalarEntry(ArgsMap, ".is_restrict", false, + msgpack::ScalarNode::SK_Boolean)) + return false; + if (!verifyScalarEntry(ArgsMap, ".is_volatile", false, + msgpack::ScalarNode::SK_Boolean)) + return false; + if (!verifyScalarEntry(ArgsMap, ".is_pipe", false, + msgpack::ScalarNode::SK_Boolean)) + return false; + + return true; +} + +bool MetadataVerifier::verifyKernel(msgpack::Node &Node) { + auto KernelMapPtr = dyn_cast(&Node); + if (!KernelMapPtr) + return false; + auto &KernelMap = *KernelMapPtr; + + if (!verifyScalarEntry(KernelMap, ".name", true, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(KernelMap, ".symbol", true, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(KernelMap, ".language", false, + msgpack::ScalarNode::SK_String, + [](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case("OpenCL C", true) + .Case("OpenCL C++", true) + .Case("HCC", true) + .Case("HIP", true) + .Case("OpenMP", true) + .Case("Assembler", true) + .Default(false); + })) + return false; + if (!verifyEntry( + KernelMap, ".language_version", false, [this](msgpack::Node &Node) { + return verifyArray( + Node, + [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2); + })) + return false; + if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) { + return verifyArray(Node, [this](msgpack::Node &Node) { + return verifyKernelArgs(Node); + }); + })) + return false; + if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false, + [this](msgpack::Node &Node) { + return verifyArray(Node, + [this](msgpack::Node &Node) { + return verifyInteger(Node); + }, + 3); + })) + return false; + if (!verifyEntry(KernelMap, ".workgroup_size_hint", false, + [this](msgpack::Node &Node) { + return verifyArray(Node, + [this](msgpack::Node &Node) { + return verifyInteger(Node); + }, + 3); + })) + return false; + if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".group_segment_fixed_size", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".sgpr_count", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".vgpr_count", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".max_flat_workgroup_size", true)) + return false; + if (!verifyIntegerEntry(KernelMap, ".sgpr_spill_count", false)) + return false; + if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false)) + return false; + + return true; +} + +bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) { + auto RootMapPtr = dyn_cast(&HSAMetadataRoot); + if (!RootMapPtr) + return false; + auto &RootMap = *RootMapPtr; + + if (!verifyEntry( + RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) { + return verifyArray( + Node, + [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2); + })) + return false; + if (!verifyEntry( + RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) { + return verifyArray(Node, [this](msgpack::Node &Node) { + return verifyScalar(Node, msgpack::ScalarNode::SK_String); + }); + })) + return false; + if (!verifyEntry(RootMap, "amdhsa.kernels", true, + [this](msgpack::Node &Node) { + return verifyArray(Node, [this](msgpack::Node &Node) { + return verifyKernel(Node); + }); + })) + return false; + + return true; +} + +} // end namespace V3 +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm Index: lib/BinaryFormat/CMakeLists.txt =================================================================== --- lib/BinaryFormat/CMakeLists.txt +++ lib/BinaryFormat/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMBinaryFormat + AMDGPUMetadataVerifier.cpp Dwarf.cpp Magic.cpp MsgPackReader.cpp Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -56,7 +56,7 @@ SIProgramInfo CurrentProgramInfo; DenseMap CallGraphResourceInfo; - AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream; + AMDGPU::HSAMD::MetadataStreamer *HSAMetadataStream; std::map PALMetadataMap; uint64_t getFunctionCodeSize(const MachineFunction &MF) const; @@ -92,6 +92,7 @@ public: explicit AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); + ~AMDGPUAsmPrinter(); StringRef getPassName() const override; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -46,6 +46,7 @@ using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; // TODO: This should get the default rounding mode from the kernel. We just set // the default here, but this could change if the OpenCL rounding mode pragmas @@ -99,8 +100,14 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) { + if (IsaInfo::hasCodeObjectV3(getSTI())) + HSAMetadataStream = new MetadataStreamerV3(); + else + HSAMetadataStream = new MetadataStreamerV2(); } +AMDGPUAsmPrinter::~AMDGPUAsmPrinter() { delete HSAMetadataStream; } + StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; } @@ -122,9 +129,6 @@ IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS); getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget); - - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; } if (TM.getTargetTriple().getOS() != Triple::AMDHSA && @@ -132,11 +136,14 @@ return; if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - HSAMetadataStream.begin(M); + HSAMetadataStream->begin(M); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) readPALMetadata(M); + if (IsaInfo::hasCodeObjectV3(getSTI())) + return; + // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. if (TM.getTargetTriple().getOS() == Triple::AMDHSA) getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); @@ -148,37 +155,38 @@ } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { - // TODO: Add metadata to code object v3. - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; - // Following code requires TargetStreamer to be present. if (!getTargetStreamer()) return; - // Emit ISA Version (NT_AMD_AMDGPU_ISA). - std::string ISAVersionString; - raw_string_ostream ISAVersionStream(ISAVersionString); - IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); - getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + // Emit ISA Version (NT_AMD_AMDGPU_ISA). + std::string ISAVersionString; + raw_string_ostream ISAVersionStream(ISAVersionString); + IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); + getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + } // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { - HSAMetadataStream.end(); - getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata()); + HSAMetadataStream->end(); + bool Success = HSAMetadataStream->emitTo(*getTargetStreamer()); + (void)Success; + assert(Success && "Malformed HSA Metadata"); } - // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). - if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { - // Copy the PAL metadata from the map where we collected it into a vector, - // then write it as a .note. - PALMD::Metadata PALMetadataVector; - for (auto i : PALMetadataMap) { - PALMetadataVector.push_back(i.first); - PALMetadataVector.push_back(i.second); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + // Copy the PAL metadata from the map where we collected it into a vector, + // then write it as a .note. + PALMD::Metadata PALMetadataVector; + for (auto i : PALMetadataMap) { + PALMetadataVector.push_back(i.first); + PALMetadataVector.push_back(i.second); + } + getTargetStreamer()->EmitPALMetadata(PALMetadataVector); } - getTargetStreamer()->EmitPALMetadata(PALMetadataVector); } } @@ -211,11 +219,8 @@ getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); } - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) - return; - - if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS()) - HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); + if (STM.isAmdHsaOS()) + HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo); } void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { Index: lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h =================================================================== --- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -19,10 +19,12 @@ #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/Support/AMDGPUMetadata.h" namespace llvm { +class AMDGPUTargetStreamer; class Argument; class DataLayout; class Function; @@ -34,7 +36,92 @@ namespace AMDGPU { namespace HSAMD { -class MetadataStreamer final { +class MetadataStreamer { +public: + virtual ~MetadataStreamer(){}; + + virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; + + virtual void begin(const Module &Mod) = 0; + + virtual void end() = 0; + + virtual void emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) = 0; +}; + +class MetadataStreamerV3 final : public MetadataStreamer { +private: + std::shared_ptr HSAMetadataRoot = + std::make_shared(); + + void dump(StringRef HSAMetadataString) const; + + void verify(StringRef HSAMetadataString) const; + + Optional getAccessQualifier(StringRef AccQual) const; + + Optional getAddressSpaceQualifier(unsigned AddressSpace) const; + + StringRef getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const; + + StringRef getValueType(Type *Ty, StringRef TypeName) const; + + std::string getTypeName(Type *Ty, bool Signed) const; + + std::shared_ptr + getWorkGroupDimensions(MDNode *Node) const; + + std::shared_ptr + getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const; + + void emitVersion(); + + void emitPrintf(const Module &Mod); + + void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern); + + void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern); + + void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern); + + void emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayNode &Args); + + void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind, + unsigned &Offset, msgpack::ArrayNode &Args, + unsigned PointeeAlign = 0, StringRef Name = "", + StringRef TypeName = "", StringRef BaseTypeName = "", + StringRef AccQual = "", StringRef TypeQual = ""); + + void emitHiddenKernelArgs(const Function &Func, unsigned &Offset, + msgpack::ArrayNode &Args); + + std::shared_ptr &getRootMetadata(StringRef Key) { + return (*cast(HSAMetadataRoot.get()))[Key]; + } + + std::shared_ptr &getHSAMetadataRoot() { + return HSAMetadataRoot; + } + +public: + MetadataStreamerV3() = default; + ~MetadataStreamerV3() = default; + + bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; + + void begin(const Module &Mod) override; + + void end() override; + + void emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) override; +}; + +class MetadataStreamerV2 final : public MetadataStreamer { private: Metadata HSAMetadata; @@ -44,7 +131,7 @@ AccessQualifier getAccessQualifier(StringRef AccQual) const; - AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const; + AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const; ValueKind getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const; @@ -82,19 +169,22 @@ void emitHiddenKernelArgs(const Function &Func); -public: - MetadataStreamer() = default; - ~MetadataStreamer() = default; - const Metadata &getHSAMetadata() const { return HSAMetadata; } - void begin(const Module &Mod); +public: + MetadataStreamerV2() = default; + ~MetadataStreamerV2() = default; + + bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; + + void begin(const Module &Mod) override; - void end(); + void end() override; - void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo); + void emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) override; }; } // end namespace HSAMD Index: lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -16,6 +16,7 @@ #include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIMachineFunctionInfo.h" #include "SIProgramInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -36,11 +37,14 @@ namespace AMDGPU { namespace HSAMD { -void MetadataStreamer::dump(StringRef HSAMetadataString) const { +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV2 +//===----------------------------------------------------------------------===// +void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } -void MetadataStreamer::verify(StringRef HSAMetadataString) const { +void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata Parser Test: "; HSAMD::Metadata FromHSAMetadataString; @@ -63,7 +67,8 @@ } } -AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { +AccessQualifier +MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { if (AccQual.empty()) return AccessQualifier::Unknown; @@ -74,7 +79,8 @@ .Default(AccessQualifier::Default); } -AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( +AddressSpaceQualifier +MetadataStreamerV2::getAddressSpaceQualifier( unsigned AddressSpace) const { switch (AddressSpace) { case AMDGPUAS::PRIVATE_ADDRESS: @@ -94,8 +100,8 @@ } } -ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, - StringRef BaseTypeName) const { +ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { if (TypeQual.find("pipe") != StringRef::npos) return ValueKind::Pipe; @@ -122,7 +128,7 @@ ValueKind::ByValue); } -ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { +ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { auto Signed = !TypeName.startswith("u"); @@ -154,7 +160,7 @@ } } -std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { +std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { if (!Signed) @@ -191,8 +197,8 @@ } } -std::vector MetadataStreamer::getWorkGroupDimensions( - MDNode *Node) const { +std::vector +MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { std::vector Dims; if (Node->getNumOperands() != 3) return Dims; @@ -202,9 +208,9 @@ return Dims; } -Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const { +Kernel::CodeProps::Metadata +MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); HSAMD::Kernel::CodeProps::Metadata HSACodeProps; @@ -231,9 +237,9 @@ return HSACodeProps; } -Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const { +Kernel::DebugProps::Metadata +MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { const GCNSubtarget &STM = MF.getSubtarget(); HSAMD::Kernel::DebugProps::Metadata HSADebugProps; @@ -253,14 +259,14 @@ return HSADebugProps; } -void MetadataStreamer::emitVersion() { +void MetadataStreamerV2::emitVersion() { auto &Version = HSAMetadata.mVersion; Version.push_back(VersionMajor); Version.push_back(VersionMinor); } -void MetadataStreamer::emitPrintf(const Module &Mod) { +void MetadataStreamerV2::emitPrintf(const Module &Mod) { auto &Printf = HSAMetadata.mPrintf; auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); @@ -272,7 +278,7 @@ Printf.push_back(cast(Op->getOperand(0))->getString()); } -void MetadataStreamer::emitKernelLanguage(const Function &Func) { +void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { auto &Kernel = HSAMetadata.mKernels.back(); // TODO: What about other languages? @@ -290,7 +296,7 @@ mdconst::extract(Op0->getOperand(1))->getZExtValue()); } -void MetadataStreamer::emitKernelAttrs(const Function &Func) { +void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { auto &Attrs = HSAMetadata.mKernels.back().mAttrs; if (auto Node = Func.getMetadata("reqd_work_group_size")) @@ -308,14 +314,14 @@ } } -void MetadataStreamer::emitKernelArgs(const Function &Func) { +void MetadataStreamerV2::emitKernelArgs(const Function &Func) { for (auto &Arg : Func.args()) emitKernelArg(Arg); emitHiddenKernelArgs(Func); } -void MetadataStreamer::emitKernelArg(const Argument &Arg) { +void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { auto Func = Arg.getParent(); auto ArgNo = Arg.getArgNo(); const MDNode *Node; @@ -368,12 +374,12 @@ PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); } -void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, - ValueKind ValueKind, - unsigned PointeeAlign, - StringRef Name, - StringRef TypeName, StringRef BaseTypeName, - StringRef AccQual, StringRef TypeQual) { +void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, + ValueKind ValueKind, + unsigned PointeeAlign, StringRef Name, + StringRef TypeName, + StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); @@ -386,7 +392,7 @@ Arg.mPointeeAlign = PointeeAlign; if (auto PtrTy = dyn_cast(Ty)) - Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); + Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); Arg.mAccQual = getAccessQualifier(AccQual); @@ -406,7 +412,7 @@ } } -void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { +void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { int HiddenArgNumBytes = getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); @@ -448,12 +454,16 @@ } } -void MetadataStreamer::begin(const Module &Mod) { +bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); +} + +void MetadataStreamerV2::begin(const Module &Mod) { emitVersion(); emitPrintf(Mod); } -void MetadataStreamer::end() { +void MetadataStreamerV2::end() { std::string HSAMetadataString; if (toString(HSAMetadata, HSAMetadataString)) return; @@ -464,7 +474,8 @@ verify(HSAMetadataString); } -void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { +void MetadataStreamerV2::emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; @@ -484,6 +495,505 @@ HSAMetadata.mKernels.back().mDebugProps = DebugProps; } +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV3 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata Parser Test: "; + + std::shared_ptr FromHSAMetadataString = + std::make_shared(); + + yaml::Input YIn(HSAMetadataString); + YIn >> FromHSAMetadataString; + if (YIn.error()) { + errs() << "FAIL\n"; + return; + } + + std::string ToHSAMetadataString; + raw_string_ostream StrOS(ToHSAMetadataString); + yaml::Output YOut(StrOS); + YOut << FromHSAMetadataString; + + errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; + if (HSAMetadataString != ToHSAMetadataString) { + errs() << "Original input: " << HSAMetadataString << '\n' + << "Produced output: " << StrOS.str() << '\n'; + } +} + +Optional +MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { + return StringSwitch>(AccQual) + .Case("read_only", StringRef("read_only")) + .Case("write_only", StringRef("write_only")) + .Case("read_write", StringRef("read_write")) + .Default(None); +} + +Optional +MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { + switch (AddressSpace) { + case AMDGPUAS::PRIVATE_ADDRESS: + return StringRef("private"); + case AMDGPUAS::GLOBAL_ADDRESS: + return StringRef("global"); + case AMDGPUAS::CONSTANT_ADDRESS: + return StringRef("constant"); + case AMDGPUAS::LOCAL_ADDRESS: + return StringRef("local"); + case AMDGPUAS::FLAT_ADDRESS: + return StringRef("generic"); + case AMDGPUAS::REGION_ADDRESS: + return StringRef("region"); + default: + return None; + } +} + +StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return "pipe"; + + return StringSwitch(BaseTypeName) + .Case("image1d_t", "image") + .Case("image1d_array_t", "image") + .Case("image1d_buffer_t", "image") + .Case("image2d_t", "image") + .Case("image2d_array_t", "image") + .Case("image2d_array_depth_t", "image") + .Case("image2d_array_msaa_t", "image") + .Case("image2d_array_msaa_depth_t", "image") + .Case("image2d_depth_t", "image") + .Case("image2d_msaa_t", "image") + .Case("image2d_msaa_depth_t", "image") + .Case("image3d_t", "image") + .Case("sampler_t", "sampler") + .Case("queue_t", "queue") + .Default(isa(Ty) + ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS + ? "dynamic_shared_pointer" + : "global_buffer") + : "by_value"); +} + +StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? "i8" : "u8"; + case 16: + return Signed ? "i16" : "u16"; + case 32: + return Signed ? "i32" : "u32"; + case 64: + return Signed ? "i64" : "u64"; + default: + return "struct"; + } + } + case Type::HalfTyID: + return "f16"; + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return "struct"; + } +} + +std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +std::shared_ptr +MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { + auto Dims = std::make_shared(); + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims->push_back(std::make_shared( + mdconst::extract(Op)->getZExtValue())); + return Dims; +} + +void MetadataStreamerV3::emitVersion() { + auto Version = std::make_shared(); + Version->push_back(std::make_shared(V3::VersionMajor)); + Version->push_back(std::make_shared(V3::VersionMinor)); + getRootMetadata("amdhsa.version") = std::move(Version); +} + +void MetadataStreamerV3::emitPrintf(const Module &Mod) { + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + auto Printf = std::make_shared(); + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf->push_back(std::make_shared( + cast(Op->getOperand(0))->getString())); + getRootMetadata("amdhsa.printf") = std::move(Printf); +} + +void MetadataStreamerV3::emitKernelLanguage(const Function &Func, + msgpack::MapNode &Kern) { + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kern[".language"] = std::make_shared("OpenCL C"); + auto LanguageVersion = std::make_shared(); + LanguageVersion->push_back(std::make_shared( + mdconst::extract(Op0->getOperand(0))->getZExtValue())); + LanguageVersion->push_back(std::make_shared( + mdconst::extract(Op0->getOperand(1))->getZExtValue())); + Kern[".language_version"] = std::move(LanguageVersion); +} + +void MetadataStreamerV3::emitKernelAttrs(const Function &Func, + msgpack::MapNode &Kern) { + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Kern[".vec_type_hint"] = std::make_shared(getTypeName( + cast(Node->getOperand(0))->getType(), + mdconst::extract(Node->getOperand(1))->getZExtValue())); + } + if (Func.hasFnAttribute("runtime-handle")) { + Kern[".device_enqueue_symbol"] = std::make_shared( + Func.getFnAttribute("runtime-handle").getValueAsString().str()); + } +} + +void MetadataStreamerV3::emitKernelArgs(const Function &Func, + msgpack::MapNode &Kern) { + unsigned Offset = 0; + auto Args = std::make_shared(); + for (auto &Arg : Func.args()) + emitKernelArg(Arg, Offset, *Args); + + emitHiddenKernelArgs(Func, Offset, *Args); + + // TODO: What about other languages? + if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args); + emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args); + emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args); + + auto Int8PtrTy = + Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args); + else + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); + + // Emit "default queue" and "completion action" arguments if enqueue kernel + // is used, otherwise emit dummy "none" arguments. + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args); + emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args); + } else { + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); + } + } + + Kern[".args"] = std::move(Args); +} + +void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayNode &Args) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast(Node->getOperand(ArgNo))->getString(); + else if (Arg.hasName()) + Name = Arg.getName(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast(Node->getOperand(ArgNo))->getString(); + } + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast(Node->getOperand(ArgNo))->getString(); + + Type *Ty = Arg.getType(); + const DataLayout &DL = Func->getParent()->getDataLayout(); + + unsigned PointeeAlign = 0; + if (auto PtrTy = dyn_cast(Ty)) { + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + PointeeAlign = Arg.getParamAlignment(); + if (PointeeAlign == 0) + PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); + } + } + + emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), + getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, + Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, + TypeQual); +} + +void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, + StringRef ValueKind, unsigned &Offset, + msgpack::ArrayNode &Args, + unsigned PointeeAlign, StringRef Name, + StringRef TypeName, + StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { + auto ArgPtr = std::make_shared(); + auto &Arg = *ArgPtr; + + if (!Name.empty()) + Arg[".name"] = std::make_shared(Name); + if (!TypeName.empty()) + Arg[".type_name"] = std::make_shared(TypeName); + auto Size = DL.getTypeAllocSize(Ty); + auto Align = DL.getABITypeAlignment(Ty); + Arg[".size"] = std::make_shared(Size); + Offset = alignTo(Offset, Align); + Arg[".offset"] = std::make_shared(Offset); + Offset += Size; + Arg[".value_kind"] = std::make_shared(ValueKind); + Arg[".value_type"] = + std::make_shared(getValueType(Ty, BaseTypeName)); + if (PointeeAlign) + Arg[".pointee_align"] = std::make_shared(PointeeAlign); + + if (auto PtrTy = dyn_cast(Ty)) + if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) + Arg[".address_space"] = std::make_shared(*Qualifier); + + if (auto AQ = getAccessQualifier(AccQual)) + Arg[".access"] = std::make_shared(*AQ); + + // TODO: Emit Arg[".actual_access"]. + + SmallVector SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + if (Key == "const") + Arg[".is_const"] = std::make_shared(true); + else if (Key == "restrict") + Arg[".is_restrict"] = std::make_shared(true); + else if (Key == "volatile") + Arg[".is_volatile"] = std::make_shared(true); + else if (Key == "pipe") + Arg[".is_pipe"] = std::make_shared(true); + } + + Args.push_back(std::move(ArgPtr)); +} + +void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, + unsigned &Offset, + msgpack::ArrayNode &Args) { + int HiddenArgNumBytes = + getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); + + if (!HiddenArgNumBytes) + return; + + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + if (HiddenArgNumBytes >= 8) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); + if (HiddenArgNumBytes >= 16) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); + if (HiddenArgNumBytes >= 24) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); + + auto Int8PtrTy = + Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (HiddenArgNumBytes >= 32) { + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); + else + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + } + + // Emit "default queue" and "completion action" arguments if enqueue kernel is + // used, otherwise emit dummy "none" arguments. + if (HiddenArgNumBytes >= 48) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); + emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); + } else { + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + } + } +} + +std::shared_ptr +MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const GCNSubtarget &STM = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + const Function &F = MF.getFunction(); + + auto HSAKernelProps = std::make_shared(); + auto &Kern = *HSAKernelProps; + + unsigned MaxKernArgAlign; + Kern[".kernarg_segment_size"] = std::make_shared( + STM.getKernArgSegmentSize(F, MaxKernArgAlign)); + Kern[".group_segment_fixed_size"] = + std::make_shared(ProgramInfo.LDSSize); + Kern[".private_segment_fixed_size"] = + std::make_shared(ProgramInfo.ScratchSize); + Kern[".kernarg_segment_align"] = + std::make_shared(std::max(uint32_t(4), MaxKernArgAlign)); + Kern[".wavefront_size"] = + std::make_shared(STM.getWavefrontSize()); + Kern[".sgpr_count"] = std::make_shared(ProgramInfo.NumSGPR); + Kern[".vgpr_count"] = std::make_shared(ProgramInfo.NumVGPR); + Kern[".max_flat_workgroup_size"] = + std::make_shared(MFI.getMaxFlatWorkGroupSize()); + Kern[".sgpr_spill_count"] = + std::make_shared(MFI.getNumSpilledSGPRs()); + Kern[".vgpr_spill_count"] = + std::make_shared(MFI.getNumSpilledVGPRs()); + + return HSAKernelProps; +} + +bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true); +} + +void MetadataStreamerV3::begin(const Module &Mod) { + emitVersion(); + emitPrintf(Mod); + getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode()); +} + +void MetadataStreamerV3::end() { + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << HSAMetadataRoot; + + if (DumpHSAMetadata) + dump(StrOS.str()); + if (VerifyHSAMetadata) + verify(StrOS.str()); +} + +void MetadataStreamerV3::emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) { + auto &Func = MF.getFunction(); + auto KernelProps = getHSAKernelProps(MF, ProgramInfo); + + assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || + Func.getCallingConv() == CallingConv::SPIR_KERNEL); + + auto &KernelsNode = getRootMetadata("amdhsa.kernels"); + auto Kernels = cast(KernelsNode.get()); + + { + auto &Kern = *KernelProps; + Kern[".name"] = std::make_shared(Func.getName()); + Kern[".symbol"] = std::make_shared( + (Twine(Func.getName()) + Twine(".kd")).str()); + emitKernelLanguage(Func, Kern); + emitKernelAttrs(Func, Kern); + emitKernelArgs(Func, Kern); + } + + Kernels->push_back(std::move(KernelProps)); +} + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm Index: lib/Target/AMDGPU/AMDGPUPTNote.h =================================================================== --- lib/Target/AMDGPU/AMDGPUPTNote.h +++ lib/Target/AMDGPU/AMDGPUPTNote.h @@ -23,7 +23,8 @@ const char SectionName[] = ".note"; -const char NoteName[] = "AMD"; +const char NoteNameV2[] = "AMD"; +const char NoteNameV3[] = "AMDGPU"; // TODO: Remove this file once we drop code object v2. enum NoteType{ Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3065,9 +3065,18 @@ } bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { + const char *AssemblerDirectiveBegin; + const char *AssemblerDirectiveEnd; + std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = + AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) + ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, + HSAMD::V3::AssemblerDirectiveEnd) + : std::make_tuple(HSAMD::AssemblerDirectiveBegin, + HSAMD::AssemblerDirectiveEnd); + if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { return Error(getParser().getTok().getLoc(), - (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " + (Twine(AssemblerDirectiveBegin) + Twine(" directive is " "not available on non-amdhsa OSes")).str()); } @@ -3085,7 +3094,7 @@ if (getLexer().is(AsmToken::Identifier)) { StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { + if (ID == AssemblerDirectiveEnd) { Lex(); FoundEnd = true; break; @@ -3107,8 +3116,13 @@ YamlStream.flush(); - if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) - return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + if (IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } else { + if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } return false; } @@ -3145,6 +3159,10 @@ if (IDVal == ".amdhsa_kernel") return ParseDirectiveAMDHSAKernel(); + + // TODO: Restructure/combine with PAL metadata directive. + if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); } else { if (IDVal == ".hsa_code_object_version") return ParseDirectiveHSACodeObjectVersion(); @@ -3160,10 +3178,10 @@ if (IDVal == ".amd_amdgpu_isa") return ParseDirectiveISAVersion(); - } - if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) - return ParseDirectiveHSAMetadata(); + if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); + } if (IDVal == PALMD::AssemblerDirective) return ParseDirectivePALMetadata(); Index: lib/Target/AMDGPU/LLVMBuild.txt =================================================================== --- lib/Target/AMDGPU/LLVMBuild.txt +++ lib/Target/AMDGPU/LLVMBuild.txt @@ -30,5 +30,5 @@ type = Library name = AMDGPUCodeGen parent = AMDGPU -required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel +required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel BinaryFormat add_to_library_groups = AMDGPU Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #include "AMDKernelCodeT.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/AMDGPUMetadata.h" @@ -52,7 +53,20 @@ virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; /// \returns True on success, false on failure. - virtual bool EmitHSAMetadata(StringRef HSAMetadataString); + virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString); + + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString); + + /// Emit HSA Metadata + /// + /// When \p Strict is true, known metadata elements must already be + /// well-typed. When \p Strict is false, known types are inferred and + /// the \p HSAMetadata structure is updated with the correct types. + /// + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadata(std::shared_ptr &HSAMetadata, + bool Strict) = 0; /// \returns True on success, false on failure. virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0; @@ -92,6 +106,10 @@ bool EmitISAVersion(StringRef IsaVersionString) override; /// \returns True on success, false on failure. + bool EmitHSAMetadata(std::shared_ptr &HSAMetadata, + bool Strict) override; + + /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; /// \returns True on success, false on failure. @@ -107,8 +125,8 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; - void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType, - function_ref EmitDesc); + void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, + function_ref EmitDesc); public: AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); @@ -132,6 +150,10 @@ bool EmitISAVersion(StringRef IsaVersionString) override; /// \returns True on success, false on failure. + bool EmitHSAMetadata(std::shared_ptr &HSAMetadata, + bool Strict) override; + + /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; /// \returns True on success, false on failure. Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -17,7 +17,9 @@ #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -35,12 +37,13 @@ using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; //===----------------------------------------------------------------------===// // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// -bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { +bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) return false; @@ -48,6 +51,15 @@ return EmitHSAMetadata(HSAMetadata); } +bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { + std::shared_ptr HSAMetadataRoot; + yaml::Input YIn(HSAMetadataString); + YIn >> HSAMetadataRoot; + if (YIn.error()) + return false; + return EmitHSAMetadata(HSAMetadataRoot, false); +} + StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { AMDGPU::GPUKind AK; @@ -195,9 +207,26 @@ if (HSAMD::toString(HSAMetadata, HSAMetadataString)) return false; - OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n'; + OS << '\t' << AssemblerDirectiveBegin << '\n'; OS << HSAMetadataString << '\n'; - OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n'; + OS << '\t' << AssemblerDirectiveEnd << '\n'; + return true; +} + +bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( + std::shared_ptr &HSAMetadataRoot, bool Strict) { + V3::MetadataVerifier Verifier(Strict); + if (!Verifier.verify(*HSAMetadataRoot)) + return false; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << HSAMetadataRoot; + + OS << '\t' << V3::AssemblerDirectiveBegin << '\n'; + OS << StrOS.str() << '\n'; + OS << '\t' << V3::AssemblerDirectiveEnd << '\n'; return true; } @@ -358,13 +387,13 @@ return static_cast(Streamer); } -void AMDGPUTargetELFStreamer::EmitAMDGPUNote( - const MCExpr *DescSZ, unsigned NoteType, +void AMDGPUTargetELFStreamer::EmitNote( + StringRef Name, const MCExpr *DescSZ, unsigned NoteType, function_ref EmitDesc) { auto &S = getStreamer(); auto &Context = S.getContext(); - auto NameSZ = sizeof(ElfNote::NoteName); + auto NameSZ = Name.size() + 1; S.PushSection(); S.SwitchSection(Context.getELFSection( @@ -372,7 +401,7 @@ S.EmitIntValue(NameSZ, 4); // namesz S.EmitValue(DescSZ, 4); // descz S.EmitIntValue(NoteType, 4); // type - S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name + S.EmitBytes(Name); // name S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 @@ -384,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( uint32_t Major, uint32_t Minor) { - EmitAMDGPUNote( - MCConstantExpr::create(8, getContext()), - ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, - [&](MCELFStreamer &OS){ - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); - } - ); + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), + ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + }); } void @@ -407,21 +433,18 @@ sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + VendorNameSize + ArchNameSize; - EmitAMDGPUNote( - MCConstantExpr::create(DescSZ, getContext()), - ElfNote::NT_AMDGPU_HSA_ISA, - [&](MCELFStreamer &OS) { - OS.EmitIntValue(VendorNameSize, 2); - OS.EmitIntValue(ArchNameSize, 2); - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); - OS.EmitIntValue(Stepping, 4); - OS.EmitBytes(VendorName); - OS.EmitIntValue(0, 1); // NULL terminate VendorName - OS.EmitBytes(ArchName); - OS.EmitIntValue(0, 1); // NULL terminte ArchName - } - ); + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), + ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { + OS.EmitIntValue(VendorNameSize, 2); + OS.EmitIntValue(ArchNameSize, 2); + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + OS.EmitIntValue(Stepping, 4); + OS.EmitBytes(VendorName); + OS.EmitIntValue(0, 1); // NULL terminate VendorName + OS.EmitBytes(ArchName); + OS.EmitIntValue(0, 1); // NULL terminte ArchName + }); } void @@ -450,15 +473,41 @@ MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitAMDGPUNote( - DescSZ, - ELF::NT_AMD_AMDGPU_ISA, - [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(IsaVersionString); - OS.EmitLabel(DescEnd); - } - ); + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(IsaVersionString); + OS.EmitLabel(DescEnd); + }); + return true; +} + +bool AMDGPUTargetELFStreamer::EmitHSAMetadata( + std::shared_ptr &HSAMetadataRoot, bool Strict) { + V3::MetadataVerifier Verifier(Strict); + if (!Verifier.verify(*HSAMetadataRoot)) + return false; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + msgpack::Writer MPWriter(StrOS); + HSAMetadataRoot->write(MPWriter); + + // Create two labels to mark the beginning and end of the desc field + // and a MCExpr to calculate the size of the desc field. + auto &Context = getContext(); + auto *DescBegin = Context.createTempSymbol(); + auto *DescEnd = Context.createTempSymbol(); + auto *DescSZ = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(StrOS.str()); + OS.EmitLabel(DescEnd); + }); return true; } @@ -477,28 +526,24 @@ MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitAMDGPUNote( - DescSZ, - ELF::NT_AMD_AMDGPU_HSA_METADATA, - [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(HSAMetadataString); - OS.EmitLabel(DescEnd); - } - ); + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(HSAMetadataString); + OS.EmitLabel(DescEnd); + }); return true; } bool AMDGPUTargetELFStreamer::EmitPALMetadata( const PALMD::Metadata &PALMetadata) { - EmitAMDGPUNote( - MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()), - ELF::NT_AMD_AMDGPU_PAL_METADATA, - [&](MCELFStreamer &OS){ - for (auto I : PALMetadata) - OS.EmitIntValue(I, sizeof(uint32_t)); - } - ); + EmitNote(ElfNote::NoteNameV2, + MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), + getContext()), + ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) { + for (auto I : PALMetadata) + OS.EmitIntValue(I, sizeof(uint32_t)); + }); return true; } Index: lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt +++ lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AMDGPUDesc parent = AMDGPU -required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support +required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support BinaryFormat add_to_library_groups = AMDGPU Index: test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll @@ -0,0 +1,145 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -mattr=+code-object-v3 -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK-LABEL: {{^}}min_64_max_64: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForWavesPerEU: 1 +; CHECK: NumVGPRsForWavesPerEU: 1 +define amdgpu_kernel void @min_64_max_64() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-flat-work-group-size"="64,64"} + +; CHECK-LABEL: {{^}}min_64_max_128: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForWavesPerEU: 1 +; CHECK: NumVGPRsForWavesPerEU: 1 +define amdgpu_kernel void @min_64_max_128() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-flat-work-group-size"="64,128"} + +; CHECK-LABEL: {{^}}min_128_max_128: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForWavesPerEU: 1 +; CHECK: NumVGPRsForWavesPerEU: 1 +define amdgpu_kernel void @min_128_max_128() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} + +; CHECK-LABEL: {{^}}min_1024_max_2048 +; CHECK: SGPRBlocks: 1 +; CHECK: VGPRBlocks: 7 +; CHECK: NumSGPRsForWavesPerEU: 12 +; CHECK: NumVGPRsForWavesPerEU: 32 +@var = addrspace(1) global float 0.0 +define amdgpu_kernel void @min_1024_max_2048() #3 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + %val31 = load volatile float, float addrspace(1)* @var + %val32 = load volatile float, float addrspace(1)* @var + %val33 = load volatile float, float addrspace(1)* @var + %val34 = load volatile float, float addrspace(1)* @var + %val35 = load volatile float, float addrspace(1)* @var + %val36 = load volatile float, float addrspace(1)* @var + %val37 = load volatile float, float addrspace(1)* @var + %val38 = load volatile float, float addrspace(1)* @var + %val39 = load volatile float, float addrspace(1)* @var + %val40 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + store volatile float %val31, float addrspace(1)* @var + store volatile float %val32, float addrspace(1)* @var + store volatile float %val33, float addrspace(1)* @var + store volatile float %val34, float addrspace(1)* @var + store volatile float %val35, float addrspace(1)* @var + store volatile float %val36, float addrspace(1)* @var + store volatile float %val37, float addrspace(1)* @var + store volatile float %val38, float addrspace(1)* @var + store volatile float %val39, float addrspace(1)* @var + store volatile float %val40, float addrspace(1)* @var + + ret void +} +attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} + +; CHECK: amdhsa.kernels: +; CHECK: .max_flat_workgroup_size: 64 +; CHECK: .name: min_64_max_64 +; CHECK: .max_flat_workgroup_size: 128 +; CHECK: .name: min_64_max_128 +; CHECK: .max_flat_workgroup_size: 128 +; CHECK: .name: min_128_max_128 +; CHECK: .max_flat_workgroup_size: 2048 +; CHECK: .name: min_1024_max_2048 +; CHECK: amdhsa.version: +; CHECK: - 1 +; CHECK: - 0 + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/code-object-v3.ll =================================================================== --- test/CodeGen/AMDGPU/code-object-v3.ll +++ test/CodeGen/AMDGPU/code-object-v3.ll @@ -3,6 +3,8 @@ ; ALL-ASM-LABEL: {{^}}fadd: +; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_version +; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_isa ; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel ; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t @@ -57,7 +59,8 @@ ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fadd.kd ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000040 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fsub.kd -; OSABI-AMDHSA-ELF-NOT: Displaying notes found +; OSABI-AMDHSA-ELF: Displaying notes found at file offset +; OSABI-AMDHSA-ELF: AMDGPU 0x{{[0-9a-f]+}} NT_AMDGPU_METADATA (AMDGPU Metadata) define amdgpu_kernel void @fadd( float addrspace(1)* %r, Index: test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll @@ -0,0 +1,33 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; CHECK: .symbol: test_ro_arg.kd +; CHECK: .name: test_ro_arg +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'float*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: in +; CHECK-NEXT: .access: read_only +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .is_const: true +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .is_restrict: true +; CHECK-NEXT: .value_type: f32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'float*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: out +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f32 +; CHECK-NEXT: .address_space: global + +define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out) + !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 + !kernel_arg_base_type !2 !kernel_arg_type_qual !3 { + ret void +} + +!0 = !{i32 1, i32 1} +!1 = !{!"none", !"none"} +!2 = !{!"float*", !"float*"} +!3 = !{!"const restrict", !""} Index: test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll @@ -0,0 +1,101 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test_non_enqueue_kernel_caller.kd +; CHECK: .name: test_non_enqueue_kernel_caller +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NOT: .value_kind: hidden_default_queue +; CHECK-NOT: .value_kind: hidden_completion_action +define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_enqueue_kernel_caller.kd +; CHECK: .name: test_enqueue_kernel_caller +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_default_queue +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_completion_action +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK-NOT: amdhsa.printf: + +attributes #0 = { "calls-enqueue-kernel" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -0,0 +1,1453 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +%struct.A = type { i8, float } +%opencl.image1d_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque +%opencl.queue_t = type opaque +%opencl.pipe_t = type opaque +%struct.B = type { i32 addrspace(1)*} +%opencl.clk_event_t = type opaque + +@__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test_char.kd +; CHECK: .name: test_char +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NOT: .value_kind: hidden_default_queue +; CHECK-NOT: .value_kind: hidden_completion_action +define amdgpu_kernel void @test_char(i8 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 + !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_ushort2.kd +; CHECK: .name: test_ushort2 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: ushort2 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: u16 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_ushort2(<2 x i16> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 + !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_int3.kd +; CHECK: .name: test_int3 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int3 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_int3(<3 x i32> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 + !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_ulong4.kd +; CHECK: .name: test_ulong4 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: ulong4 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 32 +; CHECK-NEXT: .value_type: u64 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_ulong4(<4 x i64> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 + !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_half8.kd +; CHECK: .name: test_half8 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: half8 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_half8(<8 x half> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 + !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_float16.kd +; CHECK: .name: test_float16 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: float16 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 64 +; CHECK-NEXT: .value_type: f32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 80 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 88 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_float16(<16 x float> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 + !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_double16.kd +; CHECK: .name: test_double16 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: double16 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 128 +; CHECK-NEXT: .value_type: f64 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 128 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 136 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 144 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 152 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_double16(<16 x double> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 + !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_pointer.kd +; CHECK: .name: test_pointer +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 + !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_image.kd +; CHECK: .name: test_image +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: image2d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 + !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_sampler.kd +; CHECK: .name: test_sampler +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: sampler_t +; CHECK-NEXT: .value_kind: sampler +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_sampler(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 + !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_queue.kd +; CHECK: .name: test_queue +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: queue_t +; CHECK-NEXT: .value_kind: queue +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 + !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_struct.kd +; CHECK: .name: test_struct +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: struct A +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: private +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 + !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_i128.kd +; CHECK: .name: test_i128 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: i128 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_i128(i128 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 + !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_multi_arg.kd +; CHECK: .name: test_multi_arg +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .type_name: short2 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 4 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i16 +; CHECK-NEXT: .name: b +; CHECK-NEXT: - .type_name: char3 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: c +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) + !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 + !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: .symbol: test_addr_space.kd +; CHECK: .name: test_addr_space +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: g +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: constant +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: l +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .pointee_align: 4 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, + i32 addrspace(4)* %c, + i32 addrspace(3)* %l) + !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 + !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: .symbol: test_type_qual.kd +; CHECK: .name: test_type_qual +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .is_volatile: true +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: b +; CHECK-NEXT: .is_const: true +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .is_restrict: true +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: pipe +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .is_pipe: true +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, + i32 addrspace(1)* %b, + %opencl.pipe_t addrspace(1)* %c) + !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 + !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { + ret void +} + +; CHECK: .symbol: test_access_qual.kd +; CHECK: .name: test_access_qual +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: image1d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: ro +; CHECK-NEXT: .access: read_only +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: image2d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: wo +; CHECK-NEXT: .access: write_only +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: image3d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: rw +; CHECK-NEXT: .access: read_write +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, + %opencl.image2d_t addrspace(1)* %wo, + %opencl.image3d_t addrspace(1)* %rw) + !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 + !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_half.kd +; CHECK: .name: test_vec_type_hint_half +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: half +define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_float.kd +; CHECK: .name: test_vec_type_hint_float +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: float +define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_double.kd +; CHECK: .name: test_vec_type_hint_double +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: double +define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_char.kd +; CHECK: .name: test_vec_type_hint_char +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: char +define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_short.kd +; CHECK: .name: test_vec_type_hint_short +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: short +define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_long.kd +; CHECK: .name: test_vec_type_hint_long +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: long +define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_unknown.kd +; CHECK: .name: test_vec_type_hint_unknown +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: unknown +define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { + ret void +} + +; CHECK: .reqd_workgroup_size: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 4 +; CHECK: .symbol: test_reqd_wgs_vec_type_hint.kd +; CHECK: .name: test_reqd_wgs_vec_type_hint +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: int +define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 + !reqd_work_group_size !6 { + ret void +} + +; CHECK: .symbol: test_wgs_hint_vec_type_hint.kd +; CHECK: .workgroup_size_hint: +; CHECK-NEXT: - 8 +; CHECK-NEXT: - 16 +; CHECK-NEXT: - 32 +; CHECK: .name: test_wgs_hint_vec_type_hint +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: uint4 +define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 + !work_group_size_hint !8 { + ret void +} + +; CHECK: .symbol: test_arg_ptr_to_ptr.kd +; CHECK: .name: test_arg_ptr_to_ptr +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)* addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) + !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 + !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_arg_struct_contains_ptr.kd +; CHECK: .name: test_arg_struct_contains_ptr +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: struct B +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: private +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 + !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_arg_vector_of_ptr.kd +; CHECK: .name: test_arg_vector_of_ptr +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))' +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 + !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_arg_unknown_builtin_type.kd +; CHECK: .name: test_arg_unknown_builtin_type +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: clk_event_t +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_unknown_builtin_type( + %opencl.clk_event_t addrspace(1)* %a) + !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 + !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_pointee_align.kd +; CHECK: .name: test_pointee_align +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'long addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'char addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 1 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char2 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 12 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 2 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char3 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: d +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 4 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char4 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: e +; CHECK-NEXT: .offset: 20 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 4 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char8 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: f +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 8 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char16 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: g +; CHECK-NEXT: .offset: 28 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 16 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, + i8 addrspace(3)* %b, + <2 x i8> addrspace(3)* %c, + <3 x i8> addrspace(3)* %d, + <4 x i8> addrspace(3)* %e, + <8 x i8> addrspace(3)* %f, + <16 x i8> addrspace(3)* %g) + !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 + !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { + ret void +} + +; CHECK: .symbol: __test_block_invoke_kernel.kd +; CHECK: .device_enqueue_symbol: __test_block_invoke_kernel_runtime_handle +; CHECK: .name: __test_block_invoke_kernel +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: __block_literal +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 25 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .name: arg +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @__test_block_invoke_kernel( + <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 + !kernel_arg_base_type !110 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_enqueue_kernel_caller.kd +; CHECK: .name: test_enqueue_kernel_caller +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_default_queue +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_completion_action +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 + !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: unknown_addrspace_kernarg.kd +; CHECK: .name: unknown_addrspace_kernarg +; CHECK: .args: +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: ptr +define amdgpu_kernel void @unknown_addrspace_kernarg(i32 addrspace(12345)* %ptr) #0 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: amdhsa.printf: +; CHECK-NEXT: - '1:1:4:%d\n' +; CHECK-NEXT: - '2:1:8:%g\n' + +attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #1 = { "calls-enqueue-kernel" } + +!llvm.printf.fmts = !{!100, !101} + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"int"} +!4 = !{!""} +!5 = !{i32 undef, i32 1} +!6 = !{i32 1, i32 2, i32 4} +!7 = !{<4 x i32> undef, i32 0} +!8 = !{i32 8, i32 16, i32 32} +!9 = !{!"char"} +!10 = !{!"ushort2"} +!11 = !{!"int3"} +!12 = !{!"ulong4"} +!13 = !{!"half8"} +!14 = !{!"float16"} +!15 = !{!"double16"} +!16 = !{!"int addrspace(5)*"} +!17 = !{!"image2d_t"} +!18 = !{!"sampler_t"} +!19 = !{!"queue_t"} +!20 = !{!"struct A"} +!21 = !{!"i128"} +!22 = !{i32 0, i32 0, i32 0} +!23 = !{!"none", !"none", !"none"} +!24 = !{!"int", !"short2", !"char3"} +!25 = !{!"", !"", !""} +!26 = !{half undef, i32 1} +!27 = !{float undef, i32 1} +!28 = !{double undef, i32 1} +!29 = !{i8 undef, i32 1} +!30 = !{i16 undef, i32 1} +!31 = !{i64 undef, i32 1} +!32 = !{i32 addrspace(5)*undef, i32 1} +!50 = !{i32 1, i32 2, i32 3} +!51 = !{!"int addrspace(5)*", !"int addrspace(5)*", !"int addrspace(5)*"} +!60 = !{i32 1, i32 1, i32 1} +!61 = !{!"read_only", !"write_only", !"read_write"} +!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} +!70 = !{!"volatile", !"const restrict", !"pipe"} +!80 = !{!"int addrspace(5)* addrspace(5)*"} +!81 = !{i32 1} +!82 = !{!"struct B"} +!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"} +!84 = !{!"clk_event_t"} +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} +!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3} +!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} +!93 = !{!"long addrspace(5)*", !"char addrspace(5)*", !"char2 addrspace(5)*", !"char3 addrspace(5)*", !"char4 addrspace(5)*", !"char8 addrspace(5)*", !"char16 addrspace(5)*"} +!94 = !{!"", !"", !"", !"", !"", !"", !""} +!100 = !{!"1:1:4:%d\5Cn"} +!101 = !{!"2:1:8:%g\5Cn"} +!110 = !{!"__block_literal"} + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -0,0 +1,72 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test.kd +; CHECK: .name: test +; CHECK: .args: +; CHECK-NEXT: - .value_kind: global_buffer +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: global_buffer +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +!opencl.ocl.version = !{!0} +!0 = !{i32 2, i32 0} Index: test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll @@ -0,0 +1,95 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +%opencl.image1d_t = type opaque +%opencl.image1d_array_t = type opaque +%opencl.image1d_buffer_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image2d_array_t = type opaque +%opencl.image2d_array_depth_t = type opaque +%opencl.image2d_array_msaa_t = type opaque +%opencl.image2d_array_msaa_depth_t = type opaque +%opencl.image2d_depth_t = type opaque +%opencl.image2d_msaa_t = type opaque +%opencl.image2d_msaa_depth_t = type opaque +%opencl.image3d_t = type opaque + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test.kd +; CHECK: .name: test +; CHECK: .args: +; CHECK: - .type_name: image1d_t +; CHECK: .value_kind: image +; CHECK: .name: a +; CHECK: .size: 8 +; CHECK: - .type_name: image1d_array_t +; CHECK: .value_kind: image +; CHECK: .name: b +; CHECK: .size: 8 +; CHECK: - .type_name: image1d_buffer_t +; CHECK: .value_kind: image +; CHECK: .name: c +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_t +; CHECK: .value_kind: image +; CHECK: .name: d +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_t +; CHECK: .value_kind: image +; CHECK: .name: e +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_depth_t +; CHECK: .value_kind: image +; CHECK: .name: f +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_msaa_t +; CHECK: .value_kind: image +; CHECK: .name: g +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_msaa_depth_t +; CHECK: .value_kind: image +; CHECK: .name: h +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_depth_t +; CHECK: .value_kind: image +; CHECK: .name: i +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_msaa_t +; CHECK: .value_kind: image +; CHECK: .name: j +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_msaa_depth_t +; CHECK: .value_kind: image +; CHECK: .name: k +; CHECK: .size: 8 +; CHECK: - .type_name: image3d_t +; CHECK: .value_kind: image +; CHECK: .name: l +; CHECK: .size: 8 +define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, + %opencl.image1d_array_t addrspace(1)* %b, + %opencl.image1d_buffer_t addrspace(1)* %c, + %opencl.image2d_t addrspace(1)* %d, + %opencl.image2d_array_t addrspace(1)* %e, + %opencl.image2d_array_depth_t addrspace(1)* %f, + %opencl.image2d_array_msaa_t addrspace(1)* %g, + %opencl.image2d_array_msaa_depth_t addrspace(1)* %h, + %opencl.image2d_depth_t addrspace(1)* %i, + %opencl.image2d_msaa_t addrspace(1)* %j, + %opencl.image2d_msaa_depth_t addrspace(1)* %k, + %opencl.image3d_t addrspace(1)* %l) + !kernel_arg_type !1 !kernel_arg_base_type !1 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", + !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", + !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", + !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t", + !"image3d_t"} Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll @@ -0,0 +1,11 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: ... + +!opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll @@ -0,0 +1,12 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: ... + +!opencl.ocl.version = !{!0} +!0 = !{} Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll @@ -0,0 +1,12 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: ... + +!opencl.ocl.version = !{!0} +!0 = !{i32 1} Index: test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -0,0 +1,146 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +@var = addrspace(1) global float 0.0 + +; CHECK: --- +; CHECK: amdhsa.kernels: + +; CHECK: - .max_flat_workgroup_size: 256 +; CHECK: .kernarg_segment_size: 24 +; CHECK: .private_segment_fixed_size: 0 +; CHECK: .wavefront_size: 64 +; CHECK: .symbol: test.kd +; CHECK: .name: test +; CHECK: .sgpr_count: 8 +; CHECK: .kernarg_segment_align: 8 +; CHECK: .vgpr_count: 6 +; CHECK: .group_segment_fixed_size: 0 +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: .symbol: num_spilled_sgprs.kd +; CHECK: .name: num_spilled_sgprs +; GFX700: .sgpr_spill_count: 40 +; GFX803: .sgpr_spill_count: 24 +; GFX900: .sgpr_spill_count: 24 +define amdgpu_kernel void @num_spilled_sgprs( + i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32], + i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, [8 x i32], + i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, [8 x i32], + i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, [8 x i32], + i32 addrspace(1)* %out8, i32 addrspace(1)* %out9, [8 x i32], + i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, [8 x i32], + i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, [8 x i32], + i32 addrspace(1)* %oute, i32 addrspace(1)* %outf, [8 x i32], + i32 %in0, i32 %in1, i32 %in2, i32 %in3, [8 x i32], + i32 %in4, i32 %in5, i32 %in6, i32 %in7, [8 x i32], + i32 %in8, i32 %in9, i32 %ina, i32 %inb, [8 x i32], + i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 { +entry: + store i32 %in0, i32 addrspace(1)* %out0 + store i32 %in1, i32 addrspace(1)* %out1 + store i32 %in2, i32 addrspace(1)* %out2 + store i32 %in3, i32 addrspace(1)* %out3 + store i32 %in4, i32 addrspace(1)* %out4 + store i32 %in5, i32 addrspace(1)* %out5 + store i32 %in6, i32 addrspace(1)* %out6 + store i32 %in7, i32 addrspace(1)* %out7 + store i32 %in8, i32 addrspace(1)* %out8 + store i32 %in9, i32 addrspace(1)* %out9 + store i32 %ina, i32 addrspace(1)* %outa + store i32 %inb, i32 addrspace(1)* %outb + store i32 %inc, i32 addrspace(1)* %outc + store i32 %ind, i32 addrspace(1)* %outd + store i32 %ine, i32 addrspace(1)* %oute + store i32 %inf, i32 addrspace(1)* %outf + ret void +} + +; CHECK: .symbol: num_spilled_vgprs.kd +; CHECK: .name: num_spilled_vgprs +; CHECK: .vgpr_spill_count: 14 +define amdgpu_kernel void @num_spilled_vgprs() #1 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +attributes #0 = { "amdgpu-num-sgpr"="14" } +attributes #1 = { "amdgpu-num-vgpr"="20" } Index: test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s @@ -0,0 +1,96 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_metadata +// CHECK: amdhsa.kernels: +// CHECK-NEXT: - .max_flat_workgroup_size: 256 +// CHECK-NEXT: .wavefront_size: 128 +// CHECK-NEXT: .symbol: 'test_kernel@kd' +// CHECK-NEXT: .kernarg_segment_size: 8 +// CHECK-NEXT: .private_segment_fixed_size: 32 +// CHECK-NEXT: .name: test_kernel +// CHECK-NEXT: .language: OpenCL C +// CHECK-NEXT: .sgpr_count: 14 +// CHECK-NEXT: .kernarg_segment_align: 64 +// CHECK-NEXT: .vgpr_count: 40 +// CHECK-NEXT: .group_segment_fixed_size: 16 +// CHECK-NEXT: .language_version: +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 0 +// CHECK-NEXT: .args: +// CHECK-NEXT: - .type_name: char +// CHECK-NEXT: .value_kind: by_value +// CHECK-NEXT: .offset: 1 +// CHECK-NEXT: .size: 1 +// CHECK-NEXT: .value_type: i8 +// CHECK-NEXT: - .value_kind: hidden_global_offset_x +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i64 +// CHECK-NEXT: - .value_kind: hidden_global_offset_y +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i64 +// CHECK-NEXT: - .value_kind: hidden_global_offset_z +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i64 +// CHECK-NEXT: - .value_kind: hidden_printf_buffer +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i8 +// CHECK-NEXT: .address_space: global +// CHECK: amdhsa.version: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 0 +// CHECK: amdhsa.printf: +// CHECK-NEXT: - '1:1:4:%d\n' +// CHECK-NEXt: - '2:1:8:%g\n' +// CHECK: .end_amdgpu_metadata +.amdgpu_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .language: OpenCL C + .language_version: + - 2 + - 0 + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + .args: + - .type_name: char + .size: 1 + .offset: 1 + .value_kind: by_value + .value_type: i8 + - .size: 8 + .offset: 8 + .value_kind: hidden_global_offset_x + .value_type: i64 + - .size: 8 + .offset: 8 + .value_kind: hidden_global_offset_y + .value_type: i64 + - .size: 8 + .offset: 8 + .value_kind: hidden_global_offset_z + .value_type: i64 + - .size: 8 + .offset: 8 + .value_kind: hidden_printf_buffer + .value_type: i8 + .address_space: global +.end_amdgpu_metadata Index: test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s @@ -0,0 +1,67 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_metadata +// CHECK: amdhsa.kernels: +// CHECK: - .max_flat_workgroup_size: 256 +// CHECK: .wavefront_size: 128 +// CHECK: .symbol: 'test_kernel@kd' +// CHECK: .reqd_workgroup_size: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 4 +// CHECK: .kernarg_segment_size: 8 +// CHECK: .private_segment_fixed_size: 32 +// CHECK: .workgroup_size_hint: +// CHECK-NEXT: - 8 +// CHECK-NEXT: - 16 +// CHECK-NEXT: - 32 +// CHECK: .name: test_kernel +// CHECK: .language: OpenCL C +// CHECK: .sgpr_count: 14 +// CHECK: .kernarg_segment_align: 64 +// CHECK: .vgpr_count: 40 +// CHECK: .language_version: +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 0 +// CHECK: .vec_type_hint: int +// CHECK: amdhsa.version: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 0 +// CHECK: amdhsa.printf: +// CHECK: - '1:1:4:%d\n' +// CHECK: - '2:1:8:%g\n' +// CHECK: .end_amdgpu_metadata +.amdgpu_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .language: OpenCL C + .language_version: + - 2 + - 0 + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + .reqd_workgroup_size: + - 1 + - 2 + - 4 + .workgroup_size_hint: + - 8 + - 16 + - 32 + .vec_type_hint: int +.end_amdgpu_metadata Index: test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amdgpu_metadata +// CHECK: amdhsa.kernels: +// CHECK: - .sgpr_count: 40 +// CHECK: .max_flat_workgroup_size: 256 +// CHECK: .symbol: 'test_kernel@kd' +// CHECK: .kernarg_segment_size: 24 +// CHECK: .group_segment_fixed_size: 24 +// CHECK: .private_segment_fixed_size: 16 +// CHECK: .vgpr_count: 14 +// CHECK: .vgpr_spill_count: 1 +// CHECK: .kernarg_segment_align: 16 +// CHECK: .sgpr_spill_count: 1 +// CHECK: .wavefront_size: 64 +// CHECK: .name: test_kernel +// CHECK: amdhsa.version: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 0 +.amdgpu_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .kernarg_segment_size: 24 + .group_segment_fixed_size: 24 + .private_segment_fixed_size: 16 + .kernarg_segment_align: 16 + .wavefront_size: 64 + .max_flat_workgroup_size: 256 + .sgpr_count: 40 + .vgpr_count: 14 + .sgpr_spill_count: 1 + .vgpr_spill_count: 1 +.end_amdgpu_metadata Index: test/MC/AMDGPU/hsa-v3.s =================================================================== --- test/MC/AMDGPU/hsa-v3.s +++ test/MC/AMDGPU/hsa-v3.s @@ -213,3 +213,59 @@ // ASM: .byte 17 .byte .amdgcn.next_free_sgpr // ASM: .byte 4 + +// Metadata + +.amdgpu_metadata + amdhsa.version: + - 3 + - 0 + amdhsa.kernels: + - .name: amd_kernel_code_t_test_all + .symbol: amd_kernel_code_t_test_all@kd + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + - .name: amd_kernel_code_t_minimal + .symbol: amd_kernel_code_t_minimal@kd + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 +.end_amdgpu_metadata + +// ASM: .amdgpu_metadata +// ASM: amdhsa.kernels: +// ASM: - .sgpr_count: 14 +// ASM: .max_flat_workgroup_size: 256 +// ASM: .symbol: 'amd_kernel_code_t_test_all@kd' +// ASM: .kernarg_segment_size: 8 +// ASM: .group_segment_fixed_size: 16 +// ASM: .private_segment_fixed_size: 32 +// ASM: .vgpr_count: 40 +// ASM: .kernarg_segment_align: 64 +// ASM: .wavefront_size: 128 +// ASM: .name: amd_kernel_code_t_test_all +// ASM: - .sgpr_count: 14 +// ASM: .max_flat_workgroup_size: 256 +// ASM: .symbol: 'amd_kernel_code_t_minimal@kd' +// ASM: .kernarg_segment_size: 8 +// ASM: .group_segment_fixed_size: 16 +// ASM: .private_segment_fixed_size: 32 +// ASM: .vgpr_count: 40 +// ASM: .kernarg_segment_align: 64 +// ASM: .wavefront_size: 128 +// ASM: .name: amd_kernel_code_t_minimal +// ASM: amdhsa.version: +// ASM-NEXT: - 3 +// ASM-NEXT: - 0 +// ASM: .end_amdgpu_metadata Index: tools/llvm-readobj/ELFDumper.cpp =================================================================== --- tools/llvm-readobj/ELFDumper.cpp +++ tools/llvm-readobj/ELFDumper.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" @@ -3631,7 +3632,7 @@ return OS.str(); } -static std::string getAMDGPUNoteTypeName(const uint32_t NT) { +static std::string getAMDNoteTypeName(const uint32_t NT) { static const struct { uint32_t ID; const char *Name; @@ -3654,6 +3655,16 @@ return OS.str(); } +static std::string getAMDGPUNoteTypeName(const uint32_t NT) { + if (NT == ELF::NT_AMDGPU_METADATA) + return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)"); + + std::string string; + raw_string_ostream OS(string); + OS << format("Unknown note type (0x%08x)", NT); + return OS.str(); +} + template static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, ArrayRef Data) { @@ -3811,14 +3822,13 @@ OS << '\n'; } -struct AMDGPUNote { - std::string type; - std::string value; +struct AMDNote { + std::string Type; + std::string Value; }; template -static AMDGPUNote getAMDGPUNote(uint32_t NoteType, - ArrayRef Desc) { +static AMDNote getAMDNote(uint32_t NoteType, ArrayRef Desc) { switch (NoteType) { default: return {"", ""}; @@ -3844,6 +3854,41 @@ } } +struct AMDGPUNote { + std::string Type; + std::string Value; +}; + +template +static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef Words) { + switch (NoteType) { + default: + return {"", ""}; + case ELF::NT_AMDGPU_METADATA: + auto MsgPackString = + StringRef(reinterpret_cast(Words.data()), Words.size()); + msgpack::Reader MsgPackReader(MsgPackString); + auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader); + if (errorToBool(OptMsgPackNodeOrErr.takeError())) + return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"}; + auto &OptMsgPackNode = *OptMsgPackNodeOrErr; + if (!OptMsgPackNode) + return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"}; + auto &MsgPackNode = *OptMsgPackNode; + + AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); + if (!Verifier.verify(*MsgPackNode)) + return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"}; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << MsgPackNode; + + return {"AMDGPU Metadata", StrOS.str()}; + } +} + template void GNUStyle::printNotes(const ELFFile *Obj) { const Elf_Ehdr *e = Obj->getHeader(); @@ -3870,10 +3915,15 @@ } else if (Name == "FreeBSD") { OS << getFreeBSDNoteTypeName(Type) << '\n'; } else if (Name == "AMD") { + OS << getAMDNoteTypeName(Type) << '\n'; + const AMDNote N = getAMDNote(Type, Descriptor); + if (!N.Type.empty()) + OS << " " << N.Type << ":\n " << N.Value << '\n'; + } else if (Name == "AMDGPU") { OS << getAMDGPUNoteTypeName(Type) << '\n'; const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); - if (!N.type.empty()) - OS << " " << N.type << ":\n " << N.value << '\n'; + if (!N.Type.empty()) + OS << " " << N.Type << ":\n " << N.Value << '\n'; } else { OS << "Unknown note type: (" << format_hex(Type, 10) << ')'; } @@ -4536,10 +4586,15 @@ } else if (Name == "FreeBSD") { W.printString("Type", getFreeBSDNoteTypeName(Type)); } else if (Name == "AMD") { + W.printString("Type", getAMDNoteTypeName(Type)); + const AMDNote N = getAMDNote(Type, Descriptor); + if (!N.Type.empty()) + W.printString(N.Type, N.Value); + } else if (Name == "AMDGPU") { W.printString("Type", getAMDGPUNoteTypeName(Type)); const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); - if (!N.type.empty()) - W.printString(N.type, N.value); + if (!N.Type.empty()) + W.printString(N.Type, N.Value); } else { W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')'; }