Index: include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h =================================================================== --- /dev/null +++ include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h @@ -0,0 +1,70 @@ +//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This is a verifier for AMDGPU HSA metadata, which can verify both +/// well-typed metadata and untyped metadata. When verifying in the non-strict +/// mode, untyped metadata is coerced into the correct type if possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H +#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H + +#include "llvm/BinaryFormat/MsgPackTypes.h" + +namespace llvm { +namespace AMDGPU { +namespace HSAMD { +namespace V3 { + +/// Verifier for AMDGPU HSA metadata. +/// +/// Operates in two modes: +/// +/// In strict mode, metadata must already be well-typed. +/// +/// In non-strict mode, metadata is coerced into expected types when possible. +class MetadataVerifier { + bool Strict; + + bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue = {}); + bool verifyInteger(msgpack::Node &Node); + bool verifyArray(msgpack::Node &Node, + function_ref verifyNode, + Optional Size = None); + bool verifyEntry(msgpack::MapNode<> &MapNode, StringRef Key, bool Required, + function_ref verifyNode); + bool + verifyScalarEntry(msgpack::MapNode<> &MapNode, StringRef Key, bool Required, + msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue = {}); + bool verifyIntegerEntry(msgpack::MapNode<> &MapNode, StringRef Key, + bool Required); + bool verifyKernelArgs(msgpack::Node &Node); + bool verifyKernel(msgpack::Node &Node); + +public: + /// Construct a MetadataVerifier, specifying whether it will operate in \p + /// Strict mode. + MetadataVerifier(bool Strict) : Strict(Strict) {} + + /// Verify given HSA metadata. + /// + /// \returns True when successful, false when metadata is invalid. + bool verify(std::unique_ptr &HSAMetadataRoot); +}; + +} // end namespace V3 +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H Index: include/llvm/BinaryFormat/ELF.h =================================================================== --- include/llvm/BinaryFormat/ELF.h +++ include/llvm/BinaryFormat/ELF.h @@ -1318,7 +1318,7 @@ GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1 }; -// AMDGPU specific notes. +// AMD specific notes. (Code Object V2) enum { // Note types with values between 0 and 9 (inclusive) are reserved. NT_AMD_AMDGPU_HSA_METADATA = 10, @@ -1326,6 +1326,12 @@ NT_AMD_AMDGPU_PAL_METADATA = 12 }; +// AMDGPU specific notes. (Code Object V3) +enum { + // Note type value 0 is reserved. + NT_AMDGPU_METADATA = 1 +}; + enum { GNU_ABI_TAG_LINUX = 0, GNU_ABI_TAG_HURD = 1, Index: include/llvm/Support/AMDGPUMetadata.h =================================================================== --- include/llvm/Support/AMDGPUMetadata.h +++ include/llvm/Support/AMDGPUMetadata.h @@ -431,6 +431,133 @@ /// Converts \p HSAMetadata to \p String. std::error_code toString(Metadata HSAMetadata, std::string &String); +//===----------------------------------------------------------------------===// +// HSA metadata for v3 code object. +//===----------------------------------------------------------------------===// +namespace V3 { + +/// HSA metadata major version. +constexpr uint32_t VersionMajor = 1; +/// HSA metadata minor version. +constexpr uint32_t VersionMinor = 0; + +//===----------------------------------------------------------------------===// +// Kernel Metadata. +//===----------------------------------------------------------------------===// +namespace Kernel { + +//===----------------------------------------------------------------------===// +// Kernel Argument Metadata. +//===----------------------------------------------------------------------===// +namespace Arg { + +namespace Key { +constexpr char Name[] = ".name"; +constexpr char TypeName[] = ".type_name"; +constexpr char Size[] = ".size"; +constexpr char Offset[] = ".offset"; +constexpr char ValueKind[] = ".value_kind"; +constexpr char ValueType[] = ".value_type"; +constexpr char PointeeAlign[] = ".pointee_align"; +constexpr char AddressSpace[] = ".address_space"; +constexpr char Access[] = ".access"; +constexpr char ActualAccess[] = ".actual_access"; +constexpr char IsConst[] = ".is_const"; +constexpr char IsRestrict[] = ".is_restrict"; +constexpr char IsVolatile[] = ".is_volatile"; +constexpr char IsPipe[] = ".is_pipe"; +} // end namespace Key + +namespace Access { +constexpr char ReadOnly[] = "read_only"; +constexpr char WriteOnly[] = "write_only"; +constexpr char ReadWrite[] = "read_write"; +} // end namespace Access + +namespace AddressSpace { +constexpr char Private[] = "private"; +constexpr char Global[] = "global"; +constexpr char Constant[] = "constant"; +constexpr char Local[] = "local"; +constexpr char Generic[] = "generic"; +constexpr char Region[] = "region"; +} // end namespace AddressSpace + +namespace ValueKind { +constexpr char ByValue[] = "by_value"; +constexpr char GlobalBuffer[] = "global_buffer"; +constexpr char DynamicSharedPointer[] = "dynamic_shared_pointer"; +constexpr char Sampler[] = "sampler"; +constexpr char Image[] = "image"; +constexpr char Pipe[] = "pipe"; +constexpr char Queue[] = "queue"; +constexpr char HiddenGlobalOffsetX[] = "hidden_global_offset_x"; +constexpr char HiddenGlobalOffsetY[] = "hidden_global_offset_y"; +constexpr char HiddenGlobalOffsetZ[] = "hidden_global_offset_z"; +constexpr char HiddenNone[] = "hidden_none"; +constexpr char HiddenPrintfBuffer[] = "hidden_printf_buffer"; +constexpr char HiddenDefaultQueue[] = "hidden_default_queue"; +constexpr char HiddenCompletionAction[] = "hidden_completion_action"; +} // end namespace ValueKind + +namespace ValueType { +constexpr char Struct[] = "struct"; +constexpr char I8[] = "i8"; +constexpr char U8[] = "u8"; +constexpr char I16[] = "i16"; +constexpr char U16[] = "u16"; +constexpr char F16[] = "f16"; +constexpr char I32[] = "i32"; +constexpr char U32[] = "u32"; +constexpr char F32[] = "f32"; +constexpr char I64[] = "i64"; +constexpr char U64[] = "u64"; +constexpr char F64[] = "f64"; +} // end namespace ValueType + +} // end namespace Arg + +namespace Key { +constexpr char Name[] = ".name"; +constexpr char Symbol[] = ".symbol"; +constexpr char Language[] = ".language"; +constexpr char LanguageVersion[] = ".language_version"; +constexpr char Args[] = ".args"; +constexpr char ReqdWorkGroupSize[] = ".reqd_workgroup_size"; +constexpr char WorkGroupSizeHint[] = ".workgroup_size_hint"; +constexpr char VecTypeHint[] = ".vec_type_hint"; +constexpr char DeviceEnqueueSymbol[] = ".device_enqueue_symbol"; +constexpr char KernargSegmentSize[] = ".kernarg_segment_size"; +constexpr char GroupSegmentFixedSize[] = ".group_segment_fixed_size"; +constexpr char PrivateSegmentFixedSize[] = ".private_segment_fixed_size"; +constexpr char KernargSegmentAlign[] = ".kernarg_segment_align"; +constexpr char WavefrontSize[] = ".wavefront_size"; +constexpr char SGPRCount[] = ".sgpr_count"; +constexpr char VGPRCount[] = ".vgpr_count"; +constexpr char MaxFlatWorkGroupSize[] = ".max_flat_workgroup_size"; +constexpr char SGPRSpillCount[] = ".sgpr_spill_count"; +constexpr char VGPRSpillCount[] = ".vgpr_spill_count"; +} // end namespace Key + +namespace Language { +constexpr char OpenCLC[] = "OpenCL C"; +constexpr char OpenCLCXX[] = "OpenCL C++"; +constexpr char HCC[] = "HCC"; +constexpr char HIP[] = "HIP"; +constexpr char OpenMP[] = "OpenMP"; +constexpr char Assembler[] = "Assembler"; +} // end namespace Language + +} // end namespace Kernel + +namespace Key { +constexpr char Version[] = "amdhsa.version"; +constexpr char Printf[] = "amdhsa.printf"; +constexpr char Kernels[] = "amdhsa.kernels"; +} // end namespace Key + +} // end namespace V3 + } // end namespace HSAMD //===----------------------------------------------------------------------===// Index: lib/BinaryFormat/AMDGPUMetadataVerifier.cpp =================================================================== --- /dev/null +++ lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -0,0 +1,325 @@ +//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Implements a verifier for AMDGPU HSA metadata. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" +#include "llvm/Support/AMDGPUMetadata.h" + +namespace llvm { +namespace AMDGPU { +namespace HSAMD { +namespace V3 { + +bool MetadataVerifier::verifyScalar( + msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue) { + auto ScalarPtr = dyn_cast(&Node); + if (!ScalarPtr) + return false; + auto &Scalar = *ScalarPtr; + // Do not output extraneous tags for types we know from the spec. + Scalar.IgnoreTag = true; + if (Scalar.getScalarKind() != SKind) { + if (Strict) + return false; + // If we are not strict, we interpret string values as "implicitly typed" + // and attempt to coerce them to the expected type here. + if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String) + return false; + std::string StringValue = Scalar.getString(); + Scalar.setScalarKind(SKind); + if (Scalar.inputYAML(StringValue) != StringRef()) + return false; + } + if (verifyValue) + return verifyValue(Scalar); + return true; +} + +bool MetadataVerifier::verifyInteger(msgpack::Node &Node) { + if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt)) + if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int)) + return false; + return true; +} + +bool MetadataVerifier::verifyArray( + msgpack::Node &Node, function_ref verifyNode, + Optional Size) { + auto ArrayPtr = dyn_cast>(&Node); + if (!ArrayPtr) + return false; + auto &Array = *ArrayPtr; + if (Size && Array.size() != *Size) + return false; + for (auto &Item : Array) + if (!verifyNode(*Item.get())) + return false; + + return true; +} + +bool MetadataVerifier::verifyEntry( + msgpack::MapNode<> &MapNode, StringRef Key, bool Required, + function_ref verifyNode) { + auto Entry = MapNode.find(Key); + if (Entry == MapNode.end()) + return !Required; + return verifyNode(*Entry->second.get()); +} + +bool MetadataVerifier::verifyScalarEntry( + msgpack::MapNode<> &MapNode, StringRef Key, bool Required, + msgpack::ScalarNode::ScalarKind SKind, + function_ref verifyValue) { + return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) { + return verifyScalar(Node, SKind, verifyValue); + }); +} + +bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode<> &MapNode, + StringRef Key, bool Required) { + return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) { + return verifyInteger(Node); + }); +} + +bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) { + auto ArgsMapPtr = dyn_cast>(&Node); + if (!ArgsMapPtr) + return false; + auto &ArgsMap = *ArgsMapPtr; + + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::Name, false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::TypeName, false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyIntegerEntry(ArgsMap, Kernel::Arg::Key::Size, true)) + return false; + if (!verifyIntegerEntry(ArgsMap, Kernel::Arg::Key::Offset, true)) + return false; + if (!verifyScalarEntry( + ArgsMap, Kernel::Arg::Key::ValueKind, true, + msgpack::ScalarNode::SK_String, [this](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case(Kernel::Arg::ValueKind::ByValue, true) + .Case(Kernel::Arg::ValueKind::GlobalBuffer, true) + .Case(Kernel::Arg::ValueKind::DynamicSharedPointer, true) + .Case(Kernel::Arg::ValueKind::Sampler, true) + .Case(Kernel::Arg::ValueKind::Image, true) + .Case(Kernel::Arg::ValueKind::Pipe, true) + .Case(Kernel::Arg::ValueKind::Queue, true) + .Case(Kernel::Arg::ValueKind::HiddenGlobalOffsetX, true) + .Case(Kernel::Arg::ValueKind::HiddenGlobalOffsetY, true) + .Case(Kernel::Arg::ValueKind::HiddenGlobalOffsetZ, true) + .Case(Kernel::Arg::ValueKind::HiddenNone, true) + .Case(Kernel::Arg::ValueKind::HiddenPrintfBuffer, true) + .Case(Kernel::Arg::ValueKind::HiddenDefaultQueue, true) + .Case(Kernel::Arg::ValueKind::HiddenCompletionAction, true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::ValueType, true, + msgpack::ScalarNode::SK_String, + [this](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case(Kernel::Arg::ValueType::Struct, true) + .Case(Kernel::Arg::ValueType::I8, true) + .Case(Kernel::Arg::ValueType::U8, true) + .Case(Kernel::Arg::ValueType::I16, true) + .Case(Kernel::Arg::ValueType::U16, true) + .Case(Kernel::Arg::ValueType::F16, true) + .Case(Kernel::Arg::ValueType::I32, true) + .Case(Kernel::Arg::ValueType::U32, true) + .Case(Kernel::Arg::ValueType::F32, true) + .Case(Kernel::Arg::ValueType::I64, true) + .Case(Kernel::Arg::ValueType::U64, true) + .Case(Kernel::Arg::ValueType::F64, true) + .Default(false); + })) + return false; + if (!verifyIntegerEntry(ArgsMap, Kernel::Arg::Key::PointeeAlign, false)) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::AddressSpace, false, + msgpack::ScalarNode::SK_String, + [this](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case(Kernel::Arg::AddressSpace::Private, true) + .Case(Kernel::Arg::AddressSpace::Global, true) + .Case(Kernel::Arg::AddressSpace::Constant, true) + .Case(Kernel::Arg::AddressSpace::Local, true) + .Case(Kernel::Arg::AddressSpace::Generic, true) + .Case(Kernel::Arg::AddressSpace::Region, true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::Access, false, + msgpack::ScalarNode::SK_String, + [this](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case(Kernel::Arg::Access::ReadOnly, true) + .Case(Kernel::Arg::Access::WriteOnly, true) + .Case(Kernel::Arg::Access::ReadWrite, true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::ActualAccess, false, + msgpack::ScalarNode::SK_String, + [this](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case(Kernel::Arg::Access::ReadOnly, true) + .Case(Kernel::Arg::Access::WriteOnly, true) + .Case(Kernel::Arg::Access::ReadWrite, true) + .Default(false); + })) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::IsConst, false, + msgpack::ScalarNode::SK_Boolean)) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::IsRestrict, false, + msgpack::ScalarNode::SK_Boolean)) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::IsVolatile, false, + msgpack::ScalarNode::SK_Boolean)) + return false; + if (!verifyScalarEntry(ArgsMap, Kernel::Arg::Key::IsPipe, false, + msgpack::ScalarNode::SK_Boolean)) + return false; + + return true; +} + +bool MetadataVerifier::verifyKernel(msgpack::Node &Node) { + auto KernelMapPtr = dyn_cast>(&Node); + if (!KernelMapPtr) + return false; + auto &KernelMap = *KernelMapPtr; + + if (!verifyScalarEntry(KernelMap, Kernel::Key::Name, true, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(KernelMap, Kernel::Key::Symbol, true, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(KernelMap, Kernel::Key::Language, false, + msgpack::ScalarNode::SK_String, + [this](msgpack::ScalarNode &SNode) { + return StringSwitch(SNode.getString()) + .Case(Kernel::Language::OpenCLC, true) + .Case(Kernel::Language::OpenCLCXX, true) + .Case(Kernel::Language::HCC, true) + .Case(Kernel::Language::HIP, true) + .Case(Kernel::Language::OpenMP, true) + .Case(Kernel::Language::Assembler, true) + .Default(false); + })) + return false; + if (!verifyEntry( + KernelMap, Kernel::Key::LanguageVersion, false, + [this](msgpack::Node &Node) { + return verifyArray( + Node, + [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2); + })) + return false; + if (!verifyEntry(KernelMap, Kernel::Key::Args, false, + [this](msgpack::Node &Node) { + return verifyArray(Node, [this](msgpack::Node &Node) { + return verifyKernelArgs(Node); + }); + })) + return false; + if (!verifyEntry(KernelMap, Kernel::Key::ReqdWorkGroupSize, false, + [this](msgpack::Node &Node) { + return verifyArray(Node, + [this](msgpack::Node &Node) { + return verifyInteger(Node); + }, + 3); + })) + return false; + if (!verifyEntry(KernelMap, Kernel::Key::WorkGroupSizeHint, false, + [this](msgpack::Node &Node) { + return verifyArray(Node, + [this](msgpack::Node &Node) { + return verifyInteger(Node); + }, + 3); + })) + return false; + if (!verifyScalarEntry(KernelMap, Kernel::Key::VecTypeHint, false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyScalarEntry(KernelMap, Kernel::Key::DeviceEnqueueSymbol, false, + msgpack::ScalarNode::SK_String)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::KernargSegmentSize, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::GroupSegmentFixedSize, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::PrivateSegmentFixedSize, + true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::KernargSegmentAlign, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::WavefrontSize, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::SGPRCount, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::VGPRCount, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::MaxFlatWorkGroupSize, true)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::SGPRSpillCount, false)) + return false; + if (!verifyIntegerEntry(KernelMap, Kernel::Key::VGPRSpillCount, false)) + return false; + + return true; +} + +bool MetadataVerifier::verify( + std::unique_ptr &HSAMetadataRoot) { + auto RootMapPtr = dyn_cast>(HSAMetadataRoot.get()); + if (!RootMapPtr) + return false; + auto &RootMap = *RootMapPtr; + + if (!verifyEntry( + RootMap, Key::Version, true, [this](msgpack::Node &Node) { + return verifyArray( + Node, + [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2); + })) + return false; + if (!verifyEntry(RootMap, Key::Printf, false, [this](msgpack::Node &Node) { + return verifyArray(Node, [this](msgpack::Node &Node) { + return verifyScalar(Node, msgpack::ScalarNode::SK_String); + }); + })) + return false; + if (!verifyEntry(RootMap, Key::Kernels, true, [this](msgpack::Node &Node) { + return verifyArray( + Node, [this](msgpack::Node &Node) { return verifyKernel(Node); }); + })) + return false; + + return true; +} + +} // end namespace V3 +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm Index: lib/BinaryFormat/CMakeLists.txt =================================================================== --- lib/BinaryFormat/CMakeLists.txt +++ lib/BinaryFormat/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMBinaryFormat + AMDGPUMetadataVerifier.cpp Dwarf.cpp Magic.cpp MsgPackReader.cpp Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -56,7 +56,7 @@ SIProgramInfo CurrentProgramInfo; DenseMap CallGraphResourceInfo; - AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream; + AMDGPU::HSAMD::MetadataStreamer *HSAMetadataStream; std::map PALMetadataMap; uint64_t getFunctionCodeSize(const MachineFunction &MF) const; @@ -92,6 +92,7 @@ public: explicit AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); + ~AMDGPUAsmPrinter(); StringRef getPassName() const override; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -45,6 +45,7 @@ using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; // TODO: This should get the default rounding mode from the kernel. We just set // the default here, but this could change if the OpenCL rounding mode pragmas @@ -99,8 +100,16 @@ std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) { AMDGPUASI = static_cast(&TM)->getAMDGPUAS(); + if (IsaInfo::hasCodeObjectV3(getSTI())) + HSAMetadataStream = new MetadataStreamerV3(); + else + HSAMetadataStream = new MetadataStreamerV2(); } +AMDGPUAsmPrinter::~AMDGPUAsmPrinter() { + delete HSAMetadataStream; +} + StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; } @@ -116,16 +125,12 @@ } void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; - if (TM.getTargetTriple().getOS() != Triple::AMDHSA && TM.getTargetTriple().getOS() != Triple::AMDPAL) return; if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - HSAMetadataStream.begin(M); + HSAMetadataStream->begin(M); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) readPALMetadata(M); @@ -141,37 +146,38 @@ } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { - // TODO: Add metadata to code object v3. - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; - // Following code requires TargetStreamer to be present. if (!getTargetStreamer()) return; - // Emit ISA Version (NT_AMD_AMDGPU_ISA). - std::string ISAVersionString; - raw_string_ostream ISAVersionStream(ISAVersionString); - IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); - getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + // Emit ISA Version (NT_AMD_AMDGPU_ISA). + std::string ISAVersionString; + raw_string_ostream ISAVersionStream(ISAVersionString); + IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); + getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + } // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { - HSAMetadataStream.end(); - getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata()); + HSAMetadataStream->end(); + bool Success = HSAMetadataStream->emitTo(*getTargetStreamer()); + (void)Success; + assert(Success && "Malformed HSA Metadata"); } - // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). - if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { - // Copy the PAL metadata from the map where we collected it into a vector, - // then write it as a .note. - PALMD::Metadata PALMetadataVector; - for (auto i : PALMetadataMap) { - PALMetadataVector.push_back(i.first); - PALMetadataVector.push_back(i.second); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + // Copy the PAL metadata from the map where we collected it into a vector, + // then write it as a .note. + PALMD::Metadata PALMetadataVector; + for (auto i : PALMetadataMap) { + PALMetadataVector.push_back(i.first); + PALMetadataVector.push_back(i.second); + } + getTargetStreamer()->EmitPALMetadata(PALMetadataVector); } - getTargetStreamer()->EmitPALMetadata(PALMetadataVector); } } @@ -193,21 +199,20 @@ const SIMachineFunctionInfo &MFI = *MF->getInfo(); if (!MFI.isEntryFunction()) return; - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; - const AMDGPUSubtarget &STM = MF->getSubtarget(); - amd_kernel_code_t KernelCode; - if (STM.isAmdCodeObjectV2(MF->getFunction())) { - getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); - getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + const AMDGPUSubtarget &STM = MF->getSubtarget(); + amd_kernel_code_t KernelCode; + if (STM.isAmdCodeObjectV2(MF->getFunction())) { + getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); + getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); + } } if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); + HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo); } void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { Index: lib/Target/AMDGPU/AMDGPUPTNote.h =================================================================== --- lib/Target/AMDGPU/AMDGPUPTNote.h +++ lib/Target/AMDGPU/AMDGPUPTNote.h @@ -23,7 +23,8 @@ const char SectionName[] = ".note"; -const char NoteName[] = "AMD"; +const char NoteNameV2[] = "AMD"; +const char NoteNameV3[] = "AMDGPU"; // TODO: Remove this file once we drop code object v2. enum NoteType{ Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2733,8 +2733,13 @@ YamlStream.flush(); - if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) - return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + if (IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } else { + if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } return false; } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h @@ -17,8 +17,10 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H #include "AMDGPU.h" +#include "AMDGPUTargetStreamer.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/Support/AMDGPUMetadata.h" namespace llvm { @@ -34,7 +36,86 @@ namespace AMDGPU { namespace HSAMD { -class MetadataStreamer final { +class MetadataStreamer { +public: + virtual ~MetadataStreamer() {}; + + virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; + + virtual void begin(const Module &Mod) = 0; + + virtual void end() = 0; + + virtual void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) = 0; +}; + +class MetadataStreamerV3 final : public MetadataStreamer { +private: + std::unique_ptr HSAMetadataRoot = msgpack::makePtr>(); + AMDGPUAS AMDGPUASI; + + void dump(StringRef HSAMetadataString) const; + + void verify(StringRef HSAMetadataString) const; + + Optional getAccessQualifier(StringRef AccQual) const; + + StringRef getAddressSpaceQualifer(unsigned AddressSpace) const; + + StringRef getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const; + + StringRef getValueType(Type *Ty, StringRef TypeName) const; + + std::string getTypeName(Type *Ty, bool Signed) const; + + std::unique_ptr> + getWorkGroupDimensions(MDNode *Node) const; + + std::unique_ptr> + getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const; + + void emitVersion(); + + void emitPrintf(const Module &Mod); + + void emitKernelLanguage(const Function &Func, msgpack::MapNode<> &Kern); + + void emitKernelAttrs(const Function &Func, msgpack::MapNode<> &Kern); + + void emitKernelArgs(const Function &Func, msgpack::MapNode<> &Kern); + + void emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayNode<> &Args); + + void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind, + unsigned &Offset, msgpack::ArrayNode<> &Args, + StringRef Name = "", StringRef TypeName = "", + StringRef BaseTypeName = "", StringRef AccQual = "", + StringRef TypeQual = ""); + + std::unique_ptr &getRootMetadata(StringRef Key) { + return (*cast>(HSAMetadataRoot.get()))[Key]; + } + + std::unique_ptr &getHSAMetadataRoot() { + return HSAMetadataRoot; + } + +public: + MetadataStreamerV3() = default; + ~MetadataStreamerV3() = default; + + bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; + + void begin(const Module &Mod) override; + + void end() override; + + void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override; +}; + +class MetadataStreamerV2 final : public MetadataStreamer { private: Metadata HSAMetadata; AMDGPUAS AMDGPUASI; @@ -80,19 +161,21 @@ StringRef BaseTypeName = "", StringRef AccQual = "", StringRef TypeQual = ""); -public: - MetadataStreamer() = default; - ~MetadataStreamer() = default; - const Metadata &getHSAMetadata() const { return HSAMetadata; } - void begin(const Module &Mod); +public: + MetadataStreamerV2() = default; + ~MetadataStreamerV2() = default; + + bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; + + void begin(const Module &Mod) override; - void end(); + void end() override; - void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo); + void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override; }; } // end namespace HSAMD Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp @@ -35,11 +35,14 @@ namespace AMDGPU { namespace HSAMD { -void MetadataStreamer::dump(StringRef HSAMetadataString) const { +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV2 +//===----------------------------------------------------------------------===// +void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } -void MetadataStreamer::verify(StringRef HSAMetadataString) const { +void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata Parser Test: "; HSAMD::Metadata FromHSAMetadataString; @@ -62,7 +65,7 @@ } } -AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { +AccessQualifier MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { if (AccQual.empty()) return AccessQualifier::Unknown; @@ -73,7 +76,7 @@ .Default(AccessQualifier::Default); } -AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( +AddressSpaceQualifier MetadataStreamerV2::getAddressSpaceQualifer( unsigned AddressSpace) const { if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) return AddressSpaceQualifier::Private; @@ -91,7 +94,7 @@ llvm_unreachable("Unknown address space qualifier"); } -ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, +ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const { if (TypeQual.find("pipe") != StringRef::npos) return ValueKind::Pipe; @@ -119,7 +122,7 @@ ValueKind::ByValue); } -ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { +ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { auto Signed = !TypeName.startswith("u"); @@ -151,7 +154,7 @@ } } -std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { +std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { if (!Signed) @@ -188,7 +191,7 @@ } } -std::vector MetadataStreamer::getWorkGroupDimensions( +std::vector MetadataStreamerV2::getWorkGroupDimensions( MDNode *Node) const { std::vector Dims; if (Node->getNumOperands() != 3) @@ -199,7 +202,7 @@ return Dims; } -Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( +Kernel::CodeProps::Metadata MetadataStreamerV2::getHSACodeProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { const SISubtarget &STM = MF.getSubtarget(); @@ -224,7 +227,7 @@ return HSACodeProps; } -Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( +Kernel::DebugProps::Metadata MetadataStreamerV2::getHSADebugProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { const SISubtarget &STM = MF.getSubtarget(); @@ -249,14 +252,14 @@ } -void MetadataStreamer::emitVersion() { +void MetadataStreamerV2::emitVersion() { auto &Version = HSAMetadata.mVersion; Version.push_back(VersionMajor); Version.push_back(VersionMinor); } -void MetadataStreamer::emitPrintf(const Module &Mod) { +void MetadataStreamerV2::emitPrintf(const Module &Mod) { auto &Printf = HSAMetadata.mPrintf; auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); @@ -268,7 +271,7 @@ Printf.push_back(cast(Op->getOperand(0))->getString()); } -void MetadataStreamer::emitKernelLanguage(const Function &Func) { +void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { auto &Kernel = HSAMetadata.mKernels.back(); // TODO: What about other languages? @@ -286,7 +289,7 @@ mdconst::extract(Op0->getOperand(1))->getZExtValue()); } -void MetadataStreamer::emitKernelAttrs(const Function &Func) { +void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { auto &Attrs = HSAMetadata.mKernels.back().mAttrs; if (auto Node = Func.getMetadata("reqd_work_group_size")) @@ -304,7 +307,7 @@ } } -void MetadataStreamer::emitKernelArgs(const Function &Func) { +void MetadataStreamerV2::emitKernelArgs(const Function &Func) { for (auto &Arg : Func.args()) emitKernelArg(Arg); @@ -340,7 +343,7 @@ } } -void MetadataStreamer::emitKernelArg(const Argument &Arg) { +void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { auto Func = Arg.getParent(); auto ArgNo = Arg.getArgNo(); const MDNode *Node; @@ -382,7 +385,7 @@ TypeName, BaseTypeName, AccQual, TypeQual); } -void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, +void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind, StringRef Name, StringRef TypeName, StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { @@ -423,13 +426,17 @@ } } -void MetadataStreamer::begin(const Module &Mod) { +bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); +} + +void MetadataStreamerV2::begin(const Module &Mod) { AMDGPUASI = getAMDGPUAS(Mod); emitVersion(); emitPrintf(Mod); } -void MetadataStreamer::end() { +void MetadataStreamerV2::end() { std::string HSAMetadataString; if (toString(HSAMetadata, HSAMetadataString)) return; @@ -440,7 +447,7 @@ verify(HSAMetadataString); } -void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { +void MetadataStreamerV2::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); auto CodeProps = getHSACodeProps(MF, ProgramInfo); auto DebugProps = getHSADebugProps(MF, ProgramInfo); @@ -460,6 +467,461 @@ HSAMetadata.mKernels.back().mDebugProps = DebugProps; } +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV3 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata Parser Test: "; + + std::unique_ptr FromHSAMetadataString + = make_unique>(); + + yaml::Input YIn(HSAMetadataString); + YIn >> FromHSAMetadataString; + if (YIn.error()) { + errs() << "FAIL\n"; + return; + } + + std::string ToHSAMetadataString; + raw_string_ostream StrOS(ToHSAMetadataString); + yaml::Output YOut(StrOS); + YOut << FromHSAMetadataString; + + errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") + << '\n'; + if (HSAMetadataString != ToHSAMetadataString) { + errs() << "Original input: " << HSAMetadataString << '\n' + << "Produced output: " << StrOS.str() << '\n'; + } +} + +Optional MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { + return StringSwitch>(AccQual) + .Case("read_only", StringRef(V3::Kernel::Arg::Access::ReadOnly)) + .Case("write_only", StringRef(V3::Kernel::Arg::Access::WriteOnly)) + .Case("read_write", StringRef(V3::Kernel::Arg::Access::ReadWrite)) + .Default(None); +} + +StringRef +MetadataStreamerV3::getAddressSpaceQualifer(unsigned AddressSpace) const { + if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) + return V3::Kernel::Arg::AddressSpace::Private; + if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) + return V3::Kernel::Arg::AddressSpace::Global; + if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) + return V3::Kernel::Arg::AddressSpace::Constant; + if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) + return V3::Kernel::Arg::AddressSpace::Local; + if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) + return V3::Kernel::Arg::AddressSpace::Generic; + if (AddressSpace == AMDGPUASI.REGION_ADDRESS) + return V3::Kernel::Arg::AddressSpace::Region; + + llvm_unreachable("Unknown address space qualifier"); +} + +StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return V3::Kernel::Arg::ValueKind::Pipe; + + return StringSwitch(BaseTypeName) + .Case("image1d_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image1d_array_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image1d_buffer_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_array_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_array_depth_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_array_msaa_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_array_msaa_depth_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_depth_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_msaa_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image2d_msaa_depth_t", V3::Kernel::Arg::ValueKind::Image) + .Case("image3d_t", V3::Kernel::Arg::ValueKind::Image) + .Case("sampler_t", V3::Kernel::Arg::ValueKind::Sampler) + .Case("queue_t", V3::Kernel::Arg::ValueKind::Queue) + .Default(isa(Ty) + ? (Ty->getPointerAddressSpace() == AMDGPUASI.LOCAL_ADDRESS + ? V3::Kernel::Arg::ValueKind::DynamicSharedPointer + : V3::Kernel::Arg::ValueKind::GlobalBuffer) + : V3::Kernel::Arg::ValueKind::ByValue); +} + +StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? V3::Kernel::Arg::ValueType::I8 : V3::Kernel::Arg::ValueType::U8; + case 16: + return Signed ? V3::Kernel::Arg::ValueType::I16 : V3::Kernel::Arg::ValueType::U16; + case 32: + return Signed ? V3::Kernel::Arg::ValueType::I32 : V3::Kernel::Arg::ValueType::U32; + case 64: + return Signed ? V3::Kernel::Arg::ValueType::I64 : V3::Kernel::Arg::ValueType::U64; + default: + return V3::Kernel::Arg::ValueType::Struct; + } + } + case Type::HalfTyID: + return V3::Kernel::Arg::ValueType::F16; + case Type::FloatTyID: + return V3::Kernel::Arg::ValueType::F32; + case Type::DoubleTyID: + return V3::Kernel::Arg::ValueType::F64; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return V3::Kernel::Arg::ValueType::Struct; + } +} + +std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +std::unique_ptr> +MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { + auto Dims = make_unique>(); + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims->push_back(make_unique( + mdconst::extract(Op)->getZExtValue())); + return Dims; +} + +void MetadataStreamerV3::emitVersion() { + auto Version = make_unique>(); + Version->push_back(make_unique(V3::VersionMajor)); + Version->push_back(make_unique(V3::VersionMinor)); + getRootMetadata(V3::Key::Version) = std::move(Version); +} + +void MetadataStreamerV3::emitPrintf(const Module &Mod) { + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + auto Printf = make_unique>(); + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf->push_back(make_unique(cast(Op->getOperand(0))->getString())); + getRootMetadata(V3::Key::Printf) = std::move(Printf); +} + +void MetadataStreamerV3::emitKernelLanguage(const Function &Func, + msgpack::MapNode<> &Kern) { + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kern[V3::Kernel::Key::Language] = + make_unique(V3::Kernel::Language::OpenCLC); + auto LanguageVersion = make_unique>(); + LanguageVersion->push_back(make_unique( + mdconst::extract(Op0->getOperand(0))->getZExtValue())); + LanguageVersion->push_back(make_unique( + mdconst::extract(Op0->getOperand(1))->getZExtValue())); + Kern[V3::Kernel::Key::LanguageVersion] = std::move(LanguageVersion); +} + +void MetadataStreamerV3::emitKernelAttrs(const Function &Func, + msgpack::MapNode<> &Kern) { + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Kern[V3::Kernel::Key::ReqdWorkGroupSize] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Kern[V3::Kernel::Key::WorkGroupSizeHint] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Kern[V3::Kernel::Key::VecTypeHint] = make_unique( + getTypeName(cast(Node->getOperand(0))->getType(), + mdconst::extract(Node->getOperand(1)) + ->getZExtValue())); + } + if (Func.hasFnAttribute("runtime-handle")) { + Kern[V3::Kernel::Key::DeviceEnqueueSymbol] = make_unique( + Func.getFnAttribute("runtime-handle").getValueAsString().str()); + } +} + +void MetadataStreamerV3::emitKernelArgs(const Function &Func, + msgpack::MapNode<> &Kern) { + unsigned Offset = 0; + auto Args = make_unique>(); + for (auto &Arg : Func.args()) + emitKernelArg(Arg, Offset, *Args); + + // TODO: What about other languages? + if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + emitKernelArg(DL, Int64Ty, V3::Kernel::Arg::ValueKind::HiddenGlobalOffsetX, + Offset, *Args); + emitKernelArg(DL, Int64Ty, V3::Kernel::Arg::ValueKind::HiddenGlobalOffsetY, + Offset, *Args); + emitKernelArg(DL, Int64Ty, V3::Kernel::Arg::ValueKind::HiddenGlobalOffsetZ, + Offset, *Args); + + auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), + AMDGPUASI.GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, V3::Kernel::Arg::ValueKind::HiddenPrintfBuffer, + Offset, *Args); + else + emitKernelArg(DL, Int8PtrTy, V3::Kernel::Arg::ValueKind::HiddenNone, Offset, + *Args); + + // Emit "default queue" and "completion action" arguments if enqueue kernel is + // used, otherwise emit dummy "none" arguments. + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, V3::Kernel::Arg::ValueKind::HiddenDefaultQueue, + Offset, *Args); + emitKernelArg(DL, Int8PtrTy, V3::Kernel::Arg::ValueKind::HiddenCompletionAction, + Offset, *Args); + } else { + emitKernelArg(DL, Int8PtrTy, V3::Kernel::Arg::ValueKind::HiddenNone, Offset, + *Args); + emitKernelArg(DL, Int8PtrTy, V3::Kernel::Arg::ValueKind::HiddenNone, Offset, + *Args); + } + } + + Kern[V3::Kernel::Key::Args] = std::move(Args); +} + +void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayNode<> &Args) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast(Node->getOperand(ArgNo))->getString(); + else if (Arg.hasName()) + Name = Arg.getName(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast(Node->getOperand(ArgNo))->getString(); + } + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast(Node->getOperand(ArgNo))->getString(); + + emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), + getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, + Args, Name, TypeName, BaseTypeName, AccQual, TypeQual); +} + +void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, + StringRef ValueKind, unsigned &Offset, + msgpack::ArrayNode<> &Args, StringRef Name, + StringRef TypeName, StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { + auto ArgPtr = make_unique>(); + auto &Arg = *ArgPtr; + + if (!Name.empty()) + Arg[V3::Kernel::Arg::Key::Name] = make_unique(Name); + if (!TypeName.empty()) + Arg[V3::Kernel::Arg::Key::TypeName] = make_unique(TypeName); + auto Size = DL.getTypeAllocSize(Ty); + auto Align = DL.getABITypeAlignment(Ty); + Arg[V3::Kernel::Arg::Key::Size] = make_unique(Size); + Offset = alignTo(Offset, Align); + Arg[V3::Kernel::Arg::Key::Offset] = make_unique(Offset); + Offset += Size; + Arg[V3::Kernel::Arg::Key::ValueKind] = + make_unique(ValueKind); + Arg[V3::Kernel::Arg::Key::ValueType] = + make_unique(getValueType(Ty, BaseTypeName)); + + if (auto PtrTy = dyn_cast(Ty)) { + auto ElTy = PtrTy->getElementType(); + if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized()) + Arg[V3::Kernel::Arg::Key::PointeeAlign] = + make_unique(DL.getABITypeAlignment(ElTy)); + } + + if (auto PtrTy = dyn_cast(Ty)) + Arg[V3::Kernel::Arg::Key::AddressSpace] = make_unique( + getAddressSpaceQualifer(PtrTy->getAddressSpace())); + + if (auto AQ = getAccessQualifier(AccQual)) + Arg[V3::Kernel::Arg::Key::Access] = make_unique(*AQ); + + // TODO: Emit Arg[V3::Kernel::Arg::Key::ActualAccess]. + + SmallVector SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + if (Key == "const") + Arg[V3::Kernel::Arg::Key::IsConst] = make_unique(true); + else if (Key == "restrict") + Arg[V3::Kernel::Arg::Key::IsRestrict] = + make_unique(true); + else if (Key == "volatile") + Arg[V3::Kernel::Arg::Key::IsVolatile] = + make_unique(true); + else if (Key == "pipe") + Arg[V3::Kernel::Arg::Key::IsPipe] = make_unique(true); + } + + Args.push_back(std::move(ArgPtr)); +} + +std::unique_ptr> +MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const SISubtarget &STM = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + + auto HSAKernelProps = make_unique>(); + auto &Kern = *HSAKernelProps; + + Kern[V3::Kernel::Key::KernargSegmentSize] = make_unique( + STM.getKernArgSegmentSize(MF.getFunction(), MFI.getABIArgOffset())); + Kern[V3::Kernel::Key::GroupSegmentFixedSize] = + make_unique(ProgramInfo.LDSSize); + Kern[V3::Kernel::Key::PrivateSegmentFixedSize] = + make_unique(ProgramInfo.ScratchSize); + Kern[V3::Kernel::Key::KernargSegmentAlign] = make_unique( + std::max(uint32_t(4), MFI.getMaxKernArgAlign())); + Kern[V3::Kernel::Key::WavefrontSize] = + make_unique(STM.getWavefrontSize()); + Kern[V3::Kernel::Key::SGPRCount] = + make_unique(ProgramInfo.NumSGPR); + Kern[V3::Kernel::Key::VGPRCount] = + make_unique(ProgramInfo.NumVGPR); + Kern[V3::Kernel::Key::MaxFlatWorkGroupSize] = + make_unique(MFI.getMaxFlatWorkGroupSize()); + Kern[V3::Kernel::Key::SGPRSpillCount] = + make_unique(MFI.getNumSpilledSGPRs()); + Kern[V3::Kernel::Key::VGPRSpillCount] = + make_unique(MFI.getNumSpilledVGPRs()); + + return HSAKernelProps; +} + + +bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true); +} + +void MetadataStreamerV3::begin(const Module &Mod) { + AMDGPUASI = getAMDGPUAS(Mod); + emitVersion(); + emitPrintf(Mod); + getRootMetadata(V3::Key::Kernels).reset(new msgpack::ArrayNode<>()); +} + +void MetadataStreamerV3::end() { + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << HSAMetadataRoot; + + if (DumpHSAMetadata) + dump(StrOS.str()); + if (VerifyHSAMetadata) + verify(StrOS.str()); +} + +void MetadataStreamerV3::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { + auto &Func = MF.getFunction(); + auto KernelProps = getHSAKernelProps(MF, ProgramInfo); + + if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return; + + auto &KernelsNode = getRootMetadata(V3::Key::Kernels); + auto Kernels = cast>(KernelsNode.get()); + + { + auto &Kern = *KernelProps; + Kern[V3::Kernel::Key::Name] = make_unique(Func.getName()); + Kern[V3::Kernel::Key::Symbol] = make_unique( + (Twine(Func.getName()) + Twine(".kd")).str()); + emitKernelLanguage(Func, Kern); + emitKernelAttrs(Func, Kern); + emitKernelArgs(Func, Kern); + } + + Kernels->push_back(std::move(KernelProps)); +} + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #include "AMDKernelCodeT.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/AMDGPUMetadata.h" @@ -56,7 +57,20 @@ virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; /// \returns True on success, false on failure. - virtual bool EmitHSAMetadata(StringRef HSAMetadataString); + virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString); + + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString); + + /// Emit HSA Metadata + /// + /// When \p Strict is true, known metadata elements must already be + /// well-typed. When \p Strict is false, known types are inferred and + /// the \p HSAMetadata structure is updated with the correct types. + /// + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadata(std::unique_ptr &HSAMetadata, + bool Strict) = 0; /// \returns True on success, false on failure. virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0; @@ -88,6 +102,10 @@ bool EmitISAVersion(StringRef IsaVersionString) override; /// \returns True on success, false on failure. + bool EmitHSAMetadata(std::unique_ptr &HSAMetadata, + bool Strict) override; + + /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; /// \returns True on success, false on failure. @@ -101,8 +119,8 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; - void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType, - function_ref EmitDesc); + void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, + function_ref EmitDesc); public: AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); @@ -124,6 +142,10 @@ bool EmitISAVersion(StringRef IsaVersionString) override; /// \returns True on success, false on failure. + bool EmitHSAMetadata(std::unique_ptr &HSAMetadata, + bool Strict) override; + + /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; /// \returns True on success, false on failure. Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -17,7 +17,9 @@ #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -34,6 +36,7 @@ using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; //===----------------------------------------------------------------------===// // AMDGPUTargetStreamer @@ -117,7 +120,7 @@ return Entry->Name; } -bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { +bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) return false; @@ -125,6 +128,15 @@ return EmitHSAMetadata(HSAMetadata); } +bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { + std::unique_ptr HSAMetadataRoot; + yaml::Input YIn(HSAMetadataString); + YIn >> HSAMetadataRoot; + if (YIn.error()) + return false; + return EmitHSAMetadata(HSAMetadataRoot, false); +} + //===----------------------------------------------------------------------===// // AMDGPUTargetAsmStreamer //===----------------------------------------------------------------------===// @@ -180,9 +192,26 @@ if (HSAMD::toString(HSAMetadata, HSAMetadataString)) return false; - OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n'; + OS << '\t' << AssemblerDirectiveBegin << '\n'; OS << HSAMetadataString << '\n'; - OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n'; + OS << '\t' << AssemblerDirectiveEnd << '\n'; + return true; +} + +bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( + std::unique_ptr &HSAMetadataRoot, bool Strict) { + V3::MetadataVerifier Verifier(Strict); + if (!Verifier.verify(HSAMetadataRoot)) + return false; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << HSAMetadataRoot; + + OS << '\t' << AssemblerDirectiveBegin << '\n'; + OS << StrOS.str() << '\n'; + OS << '\t' << AssemblerDirectiveEnd << '\n'; return true; } @@ -226,13 +255,13 @@ return static_cast(Streamer); } -void AMDGPUTargetELFStreamer::EmitAMDGPUNote( - const MCExpr *DescSZ, unsigned NoteType, +void AMDGPUTargetELFStreamer::EmitNote( + StringRef Name, const MCExpr *DescSZ, unsigned NoteType, function_ref EmitDesc) { auto &S = getStreamer(); auto &Context = S.getContext(); - auto NameSZ = sizeof(ElfNote::NoteName); + auto NameSZ = Name.size() + 1; S.PushSection(); S.SwitchSection(Context.getELFSection( @@ -240,7 +269,7 @@ S.EmitIntValue(NameSZ, 4); // namesz S.EmitValue(DescSZ, 4); // descz S.EmitIntValue(NoteType, 4); // type - S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name + S.EmitBytes(Name); // name S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 @@ -251,7 +280,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) { - EmitAMDGPUNote( + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS){ @@ -274,7 +303,7 @@ sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + VendorNameSize + ArchNameSize; - EmitAMDGPUNote( + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { @@ -317,7 +346,7 @@ MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitAMDGPUNote( + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, [&](MCELFStreamer &OS) { @@ -330,6 +359,38 @@ } bool AMDGPUTargetELFStreamer::EmitHSAMetadata( + std::unique_ptr &HSAMetadataRoot, bool Strict) { + V3::MetadataVerifier Verifier(Strict); + if (!Verifier.verify(HSAMetadataRoot)) + return false; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + msgpack::Writer MPWriter(StrOS); + HSAMetadataRoot->write(MPWriter); + + // Create two labels to mark the beginning and end of the desc field + // and a MCExpr to calculate the size of the desc field. + auto &Context = getContext(); + auto *DescBegin = Context.createTempSymbol(); + auto *DescEnd = Context.createTempSymbol(); + auto *DescSZ = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + EmitNote(ElfNote::NoteNameV3, + DescSZ, + ELF::NT_AMDGPU_METADATA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(StrOS.str()); + OS.EmitLabel(DescEnd); + } + ); + return true; +} + +bool AMDGPUTargetELFStreamer::EmitHSAMetadata( const AMDGPU::HSAMD::Metadata &HSAMetadata) { std::string HSAMetadataString; if (HSAMD::toString(HSAMetadata, HSAMetadataString)) @@ -344,7 +405,7 @@ MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitAMDGPUNote( + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, [&](MCELFStreamer &OS) { @@ -358,7 +419,7 @@ bool AMDGPUTargetELFStreamer::EmitPALMetadata( const PALMD::Metadata &PALMetadata) { - EmitAMDGPUNote( + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()), ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS){ Index: test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll @@ -0,0 +1,145 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -mattr=+code-object-v3 -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK-LABEL: {{^}}min_64_max_64: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForWavesPerEU: 1 +; CHECK: NumVGPRsForWavesPerEU: 1 +define amdgpu_kernel void @min_64_max_64() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-flat-work-group-size"="64,64"} + +; CHECK-LABEL: {{^}}min_64_max_128: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForWavesPerEU: 1 +; CHECK: NumVGPRsForWavesPerEU: 1 +define amdgpu_kernel void @min_64_max_128() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-flat-work-group-size"="64,128"} + +; CHECK-LABEL: {{^}}min_128_max_128: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForWavesPerEU: 1 +; CHECK: NumVGPRsForWavesPerEU: 1 +define amdgpu_kernel void @min_128_max_128() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} + +; CHECK-LABEL: {{^}}min_1024_max_2048 +; CHECK: SGPRBlocks: 1 +; CHECK: VGPRBlocks: 7 +; CHECK: NumSGPRsForWavesPerEU: 12 +; CHECK: NumVGPRsForWavesPerEU: 32 +@var = addrspace(1) global float 0.0 +define amdgpu_kernel void @min_1024_max_2048() #3 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + %val31 = load volatile float, float addrspace(1)* @var + %val32 = load volatile float, float addrspace(1)* @var + %val33 = load volatile float, float addrspace(1)* @var + %val34 = load volatile float, float addrspace(1)* @var + %val35 = load volatile float, float addrspace(1)* @var + %val36 = load volatile float, float addrspace(1)* @var + %val37 = load volatile float, float addrspace(1)* @var + %val38 = load volatile float, float addrspace(1)* @var + %val39 = load volatile float, float addrspace(1)* @var + %val40 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + store volatile float %val31, float addrspace(1)* @var + store volatile float %val32, float addrspace(1)* @var + store volatile float %val33, float addrspace(1)* @var + store volatile float %val34, float addrspace(1)* @var + store volatile float %val35, float addrspace(1)* @var + store volatile float %val36, float addrspace(1)* @var + store volatile float %val37, float addrspace(1)* @var + store volatile float %val38, float addrspace(1)* @var + store volatile float %val39, float addrspace(1)* @var + store volatile float %val40, float addrspace(1)* @var + + ret void +} +attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} + +; CHECK: amdhsa.kernels: +; CHECK: .max_flat_workgroup_size: 64 +; CHECK: .name: min_64_max_64 +; CHECK: .max_flat_workgroup_size: 128 +; CHECK: .name: min_64_max_128 +; CHECK: .max_flat_workgroup_size: 128 +; CHECK: .name: min_128_max_128 +; CHECK: .max_flat_workgroup_size: 2048 +; CHECK: .name: min_1024_max_2048 +; CHECK: amdhsa.version: +; CHECK: - 1 +; CHECK: - 0 + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/code-object-v3.ll =================================================================== --- test/CodeGen/AMDGPU/code-object-v3.ll +++ test/CodeGen/AMDGPU/code-object-v3.ll @@ -14,7 +14,6 @@ ; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_version ; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_isa ; OSABI-AMDHSA-ASM-NOT: .amd_amdgpu_isa -; OSABI-AMDHSA-ASM-NOT: .amd_amdgpu_hsa_metadata ; OSABI-AMDHSA-ASM-NOT: .amd_amdgpu_pal_metadata ; OSABI-AMDHSA-ELF: Section Headers @@ -31,8 +30,6 @@ ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fadd.kd ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000040 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fsub.kd -; OSABI-AMDHSA-ELF-NOT: Displaying notes found - define amdgpu_kernel void @fadd( float addrspace(1)* %r, float addrspace(1)* %a, Index: test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll @@ -0,0 +1,33 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; CHECK: .symbol: test_ro_arg.kd +; CHECK: .name: test_ro_arg +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'float*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: in +; CHECK-NEXT: .access: read_only +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .is_const: true +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .is_restrict: true +; CHECK-NEXT: .value_type: f32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'float*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: out +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f32 +; CHECK-NEXT: .address_space: global + +define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out) + !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 + !kernel_arg_base_type !2 !kernel_arg_type_qual !3 { + ret void +} + +!0 = !{i32 1, i32 1} +!1 = !{!"none", !"none"} +!2 = !{!"float*", !"float*"} +!3 = !{!"const restrict", !""} Index: test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll @@ -0,0 +1,101 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test_non_enqueue_kernel_caller.kd +; CHECK: .name: test_non_enqueue_kernel_caller +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NOT: .value_kind: hidden_default_queue +; CHECK-NOT: .value_kind: hidden_completion_action +define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_enqueue_kernel_caller.kd +; CHECK: .name: test_enqueue_kernel_caller +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_default_queue +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_completion_action +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK-NOT: amdhsa.printf: + +attributes #0 = { "calls-enqueue-kernel" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -0,0 +1,1441 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +%struct.A = type { i8, float } +%opencl.image1d_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque +%opencl.queue_t = type opaque +%opencl.pipe_t = type opaque +%struct.B = type { i32 addrspace(1)*} +%opencl.clk_event_t = type opaque + +@__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test_char.kd +; CHECK: .name: test_char +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NOT: .value_kind: hidden_default_queue +; CHECK-NOT: .value_kind: hidden_completion_action +define amdgpu_kernel void @test_char(i8 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 + !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_ushort2.kd +; CHECK: .name: test_ushort2 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: ushort2 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: u16 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_ushort2(<2 x i16> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 + !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_int3.kd +; CHECK: .name: test_int3 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int3 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_int3(<3 x i32> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 + !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_ulong4.kd +; CHECK: .name: test_ulong4 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: ulong4 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 32 +; CHECK-NEXT: .value_type: u64 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_ulong4(<4 x i64> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 + !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_half8.kd +; CHECK: .name: test_half8 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: half8 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_half8(<8 x half> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 + !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_float16.kd +; CHECK: .name: test_float16 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: float16 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 64 +; CHECK-NEXT: .value_type: f32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 80 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 88 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_float16(<16 x float> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 + !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_double16.kd +; CHECK: .name: test_double16 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: double16 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 128 +; CHECK-NEXT: .value_type: f64 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 128 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 136 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 144 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 152 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_double16(<16 x double> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 + !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_pointer.kd +; CHECK: .name: test_pointer +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 + !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_image.kd +; CHECK: .name: test_image +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: image2d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 + !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_sampler.kd +; CHECK: .name: test_sampler +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: sampler_t +; CHECK-NEXT: .value_kind: sampler +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_sampler(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 + !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_queue.kd +; CHECK: .name: test_queue +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: queue_t +; CHECK-NEXT: .value_kind: queue +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 + !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_struct.kd +; CHECK: .name: test_struct +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: struct A +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: private +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 + !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_i128.kd +; CHECK: .name: test_i128 +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: i128 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_i128(i128 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 + !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_multi_arg.kd +; CHECK: .name: test_multi_arg +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .type_name: short2 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 4 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i16 +; CHECK-NEXT: .name: b +; CHECK-NEXT: - .type_name: char3 +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: c +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) + !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 + !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: .symbol: test_addr_space.kd +; CHECK: .name: test_addr_space +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: g +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: constant +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: l +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .pointee_align: 4 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, + i32 addrspace(4)* %c, + i32 addrspace(3)* %l) + !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 + !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: .symbol: test_type_qual.kd +; CHECK: .name: test_type_qual +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .is_volatile: true +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: b +; CHECK-NEXT: .is_const: true +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .is_restrict: true +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'int addrspace(5)*' +; CHECK-NEXT: .value_kind: pipe +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .is_pipe: true +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, + i32 addrspace(1)* %b, + %opencl.pipe_t addrspace(1)* %c) + !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 + !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { + ret void +} + +; CHECK: .symbol: test_access_qual.kd +; CHECK: .name: test_access_qual +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: image1d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: ro +; CHECK-NEXT: .access: read_only +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: image2d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: wo +; CHECK-NEXT: .access: write_only +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: image3d_t +; CHECK-NEXT: .value_kind: image +; CHECK-NEXT: .name: rw +; CHECK-NEXT: .access: read_write +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, + %opencl.image2d_t addrspace(1)* %wo, + %opencl.image3d_t addrspace(1)* %rw) + !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 + !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_half.kd +; CHECK: .name: test_vec_type_hint_half +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: half +define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_float.kd +; CHECK: .name: test_vec_type_hint_float +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: float +define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_double.kd +; CHECK: .name: test_vec_type_hint_double +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: double +define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_char.kd +; CHECK: .name: test_vec_type_hint_char +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: char +define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_short.kd +; CHECK: .name: test_vec_type_hint_short +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: short +define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_long.kd +; CHECK: .name: test_vec_type_hint_long +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: long +define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { + ret void +} + +; CHECK: .symbol: test_vec_type_hint_unknown.kd +; CHECK: .name: test_vec_type_hint_unknown +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: unknown +define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { + ret void +} + +; CHECK: .reqd_workgroup_size: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 4 +; CHECK: .symbol: test_reqd_wgs_vec_type_hint.kd +; CHECK: .name: test_reqd_wgs_vec_type_hint +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: int +define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 + !reqd_work_group_size !6 { + ret void +} + +; CHECK: .symbol: test_wgs_hint_vec_type_hint.kd +; CHECK: .workgroup_size_hint: +; CHECK-NEXT: - 8 +; CHECK-NEXT: - 16 +; CHECK-NEXT: - 32 +; CHECK: .name: test_wgs_hint_vec_type_hint +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: int +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK: .vec_type_hint: uint4 +define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 + !work_group_size_hint !8 { + ret void +} + +; CHECK: .symbol: test_arg_ptr_to_ptr.kd +; CHECK: .name: test_arg_ptr_to_ptr +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'int addrspace(5)* addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) + !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 + !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_arg_struct_contains_ptr.kd +; CHECK: .name: test_arg_struct_contains_ptr +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: struct B +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: private +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 + !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_arg_vector_of_ptr.kd +; CHECK: .name: test_arg_vector_of_ptr +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))' +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 16 +; CHECK-NEXT: .value_type: i32 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 + !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_arg_unknown_builtin_type.kd +; CHECK: .name: test_arg_unknown_builtin_type +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: clk_event_t +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_arg_unknown_builtin_type( + %opencl.clk_event_t addrspace(1)* %a) + !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 + !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_pointee_align.kd +; CHECK: .name: test_pointee_align +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: 'long addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .type_name: 'char addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 1 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char2 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 12 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 2 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char3 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: d +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 4 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char4 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: e +; CHECK-NEXT: .offset: 20 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 4 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char8 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: f +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 8 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .type_name: 'char16 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .name: g +; CHECK-NEXT: .offset: 28 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .pointee_align: 16 +; CHECK-NEXT: .address_space: local +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, + i8 addrspace(3)* %b, + <2 x i8> addrspace(3)* %c, + <3 x i8> addrspace(3)* %d, + <4 x i8> addrspace(3)* %e, + <8 x i8> addrspace(3)* %f, + <16 x i8> addrspace(3)* %g) + !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 + !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { + ret void +} + +; CHECK: .symbol: __test_block_invoke_kernel.kd +; CHECK: .device_enqueue_symbol: __test_block_invoke_kernel_runtime_handle +; CHECK: .name: __test_block_invoke_kernel +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: __block_literal +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 25 +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: .name: arg +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @__test_block_invoke_kernel( + <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 + !kernel_arg_base_type !110 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: .symbol: test_enqueue_kernel_caller.kd +; CHECK: .name: test_enqueue_kernel_caller +; CHECK: .language: OpenCL C +; CHECK: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .args: +; CHECK-NEXT: - .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .name: a +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_printf_buffer +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_default_queue +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_completion_action +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 + !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: amdhsa.printf: +; CHECK-NEXT: - '1:1:4:%d\n' +; CHECK-NEXT: - '2:1:8:%g\n' + +attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #1 = { "calls-enqueue-kernel" } + +!llvm.printf.fmts = !{!100, !101} + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"int"} +!4 = !{!""} +!5 = !{i32 undef, i32 1} +!6 = !{i32 1, i32 2, i32 4} +!7 = !{<4 x i32> undef, i32 0} +!8 = !{i32 8, i32 16, i32 32} +!9 = !{!"char"} +!10 = !{!"ushort2"} +!11 = !{!"int3"} +!12 = !{!"ulong4"} +!13 = !{!"half8"} +!14 = !{!"float16"} +!15 = !{!"double16"} +!16 = !{!"int addrspace(5)*"} +!17 = !{!"image2d_t"} +!18 = !{!"sampler_t"} +!19 = !{!"queue_t"} +!20 = !{!"struct A"} +!21 = !{!"i128"} +!22 = !{i32 0, i32 0, i32 0} +!23 = !{!"none", !"none", !"none"} +!24 = !{!"int", !"short2", !"char3"} +!25 = !{!"", !"", !""} +!26 = !{half undef, i32 1} +!27 = !{float undef, i32 1} +!28 = !{double undef, i32 1} +!29 = !{i8 undef, i32 1} +!30 = !{i16 undef, i32 1} +!31 = !{i64 undef, i32 1} +!32 = !{i32 addrspace(5)*undef, i32 1} +!50 = !{i32 1, i32 2, i32 3} +!51 = !{!"int addrspace(5)*", !"int addrspace(5)*", !"int addrspace(5)*"} +!60 = !{i32 1, i32 1, i32 1} +!61 = !{!"read_only", !"write_only", !"read_write"} +!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} +!70 = !{!"volatile", !"const restrict", !"pipe"} +!80 = !{!"int addrspace(5)* addrspace(5)*"} +!81 = !{i32 1} +!82 = !{!"struct B"} +!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"} +!84 = !{!"clk_event_t"} +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} +!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3} +!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} +!93 = !{!"long addrspace(5)*", !"char addrspace(5)*", !"char2 addrspace(5)*", !"char3 addrspace(5)*", !"char4 addrspace(5)*", !"char8 addrspace(5)*", !"char16 addrspace(5)*"} +!94 = !{!"", !"", !"", !"", !"", !"", !""} +!100 = !{!"1:1:4:%d\5Cn"} +!101 = !{!"2:1:8:%g\5Cn"} +!110 = !{!"__block_literal"} + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -0,0 +1,72 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test.kd +; CHECK: .name: test +; CHECK: .args: +; CHECK-NEXT: - .value_kind: global_buffer +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: global_buffer +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: global_buffer +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_global_offset_x +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_y +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_global_offset_z +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +; CHECK-NEXT: - .value_kind: hidden_none +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: .address_space: global +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +!opencl.ocl.version = !{!0} +!0 = !{i32 2, i32 0} Index: test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll @@ -0,0 +1,95 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +%opencl.image1d_t = type opaque +%opencl.image1d_array_t = type opaque +%opencl.image1d_buffer_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image2d_array_t = type opaque +%opencl.image2d_array_depth_t = type opaque +%opencl.image2d_array_msaa_t = type opaque +%opencl.image2d_array_msaa_depth_t = type opaque +%opencl.image2d_depth_t = type opaque +%opencl.image2d_msaa_t = type opaque +%opencl.image2d_msaa_depth_t = type opaque +%opencl.image3d_t = type opaque + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: .symbol: test.kd +; CHECK: .name: test +; CHECK: .args: +; CHECK: - .type_name: image1d_t +; CHECK: .value_kind: image +; CHECK: .name: a +; CHECK: .size: 8 +; CHECK: - .type_name: image1d_array_t +; CHECK: .value_kind: image +; CHECK: .name: b +; CHECK: .size: 8 +; CHECK: - .type_name: image1d_buffer_t +; CHECK: .value_kind: image +; CHECK: .name: c +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_t +; CHECK: .value_kind: image +; CHECK: .name: d +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_t +; CHECK: .value_kind: image +; CHECK: .name: e +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_depth_t +; CHECK: .value_kind: image +; CHECK: .name: f +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_msaa_t +; CHECK: .value_kind: image +; CHECK: .name: g +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_array_msaa_depth_t +; CHECK: .value_kind: image +; CHECK: .name: h +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_depth_t +; CHECK: .value_kind: image +; CHECK: .name: i +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_msaa_t +; CHECK: .value_kind: image +; CHECK: .name: j +; CHECK: .size: 8 +; CHECK: - .type_name: image2d_msaa_depth_t +; CHECK: .value_kind: image +; CHECK: .name: k +; CHECK: .size: 8 +; CHECK: - .type_name: image3d_t +; CHECK: .value_kind: image +; CHECK: .name: l +; CHECK: .size: 8 +define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, + %opencl.image1d_array_t addrspace(1)* %b, + %opencl.image1d_buffer_t addrspace(1)* %c, + %opencl.image2d_t addrspace(1)* %d, + %opencl.image2d_array_t addrspace(1)* %e, + %opencl.image2d_array_depth_t addrspace(1)* %f, + %opencl.image2d_array_msaa_t addrspace(1)* %g, + %opencl.image2d_array_msaa_depth_t addrspace(1)* %h, + %opencl.image2d_depth_t addrspace(1)* %i, + %opencl.image2d_msaa_t addrspace(1)* %j, + %opencl.image2d_msaa_depth_t addrspace(1)* %k, + %opencl.image3d_t addrspace(1)* %l) + !kernel_arg_type !1 !kernel_arg_base_type !1 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", + !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", + !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", + !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t", + !"image3d_t"} Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll @@ -0,0 +1,11 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: ... + +!opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll @@ -0,0 +1,12 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: ... + +!opencl.ocl.version = !{!0} +!0 = !{} Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll @@ -0,0 +1,12 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s + +; Make sure llc does not crash for invalid opencl version metadata. + +; CHECK: --- +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 +; CHECK: ... + +!opencl.ocl.version = !{!0} +!0 = !{i32 1} Index: test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -0,0 +1,140 @@ +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=NOTES %s + +@var = addrspace(1) global float 0.0 + +; CHECK: --- +; CHECK: amdhsa.kernels: + +; CHECK: - .max_flat_workgroup_size: 256 +; CHECK: .kernarg_segment_size: 24 +; CHECK: .private_segment_fixed_size: 0 +; CHECK: .wavefront_size: 64 +; CHECK: .symbol: test.kd +; CHECK: .name: test +; CHECK: .sgpr_count: 6 +; CHECK: .kernarg_segment_align: 8 +; CHECK: .vgpr_count: 3 +; CHECK: .group_segment_fixed_size: 0 +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: .symbol: num_spilled_sgprs.kd +; CHECK: .name: num_spilled_sgprs +; CHECK: .sgpr_spill_count: 41 +define amdgpu_kernel void @num_spilled_sgprs( + i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, + i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, + i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8, + i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, + i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute, + i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4, + i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb, + i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 { +entry: + store i32 %in0, i32 addrspace(1)* %out0 + store i32 %in1, i32 addrspace(1)* %out1 + store i32 %in2, i32 addrspace(1)* %out2 + store i32 %in3, i32 addrspace(1)* %out3 + store i32 %in4, i32 addrspace(1)* %out4 + store i32 %in5, i32 addrspace(1)* %out5 + store i32 %in6, i32 addrspace(1)* %out6 + store i32 %in7, i32 addrspace(1)* %out7 + store i32 %in8, i32 addrspace(1)* %out8 + store i32 %in9, i32 addrspace(1)* %out9 + store i32 %ina, i32 addrspace(1)* %outa + store i32 %inb, i32 addrspace(1)* %outb + store i32 %inc, i32 addrspace(1)* %outc + store i32 %ind, i32 addrspace(1)* %outd + store i32 %ine, i32 addrspace(1)* %oute + store i32 %inf, i32 addrspace(1)* %outf + ret void +} + +; CHECK: .symbol: num_spilled_vgprs.kd +; CHECK: .name: num_spilled_vgprs +; CHECK: .vgpr_spill_count: 14 +define amdgpu_kernel void @num_spilled_vgprs() #1 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +attributes #0 = { "amdgpu-num-sgpr"="14" } +attributes #1 = { "amdgpu-num-vgpr"="20" } Index: test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s @@ -0,0 +1,96 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amd_amdgpu_hsa_metadata +// CHECK: amdhsa.kernels: +// CHECK-NEXT: - .max_flat_workgroup_size: 256 +// CHECK-NEXT: .wavefront_size: 128 +// CHECK-NEXT: .symbol: 'test_kernel@kd' +// CHECK-NEXT: .kernarg_segment_size: 8 +// CHECK-NEXT: .private_segment_fixed_size: 32 +// CHECK-NEXT: .name: test_kernel +// CHECK-NEXT: .language: OpenCL C +// CHECK-NEXT: .sgpr_count: 14 +// CHECK-NEXT: .kernarg_segment_align: 64 +// CHECK-NEXT: .vgpr_count: 40 +// CHECK-NEXT: .group_segment_fixed_size: 16 +// CHECK-NEXT: .language_version: +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 0 +// CHECK-NEXT: .args: +// CHECK-NEXT: - .type_name: char +// CHECK-NEXT: .value_kind: by_value +// CHECK-NEXT: .offset: 1 +// CHECK-NEXT: .size: 1 +// CHECK-NEXT: .value_type: i8 +// CHECK-NEXT: - .value_kind: hidden_global_offset_x +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i64 +// CHECK-NEXT: - .value_kind: hidden_global_offset_y +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i64 +// CHECK-NEXT: - .value_kind: hidden_global_offset_z +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i64 +// CHECK-NEXT: - .value_kind: hidden_printf_buffer +// CHECK-NEXT: .offset: 8 +// CHECK-NEXT: .size: 8 +// CHECK-NEXT: .value_type: i8 +// CHECK-NEXT: .address_space: global +// CHECK: amdhsa.version: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 0 +// CHECK: amdhsa.printf: +// CHECK-NEXT: - '1:1:4:%d\n' +// CHECK-NEXt: - '2:1:8:%g\n' +// CHECK: .end_amd_amdgpu_hsa_metadata +.amd_amdgpu_hsa_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .language: OpenCL C + .language_version: + - 2 + - 0 + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + .args: + - .type_name: char + .size: 1 + .offset: 1 + .value_kind: by_value + .value_type: i8 + - .size: 8 + .offset: 8 + .value_kind: hidden_global_offset_x + .value_type: i64 + - .size: 8 + .offset: 8 + .value_kind: hidden_global_offset_y + .value_type: i64 + - .size: 8 + .offset: 8 + .value_kind: hidden_global_offset_z + .value_type: i64 + - .size: 8 + .offset: 8 + .value_kind: hidden_printf_buffer + .value_type: i8 + .address_space: global +.end_amd_amdgpu_hsa_metadata Index: test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s @@ -0,0 +1,67 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amd_amdgpu_hsa_metadata +// CHECK: amdhsa.kernels: +// CHECK: - .max_flat_workgroup_size: 256 +// CHECK: .wavefront_size: 128 +// CHECK: .symbol: 'test_kernel@kd' +// CHECK: .reqd_workgroup_size: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 4 +// CHECK: .kernarg_segment_size: 8 +// CHECK: .private_segment_fixed_size: 32 +// CHECK: .workgroup_size_hint: +// CHECK-NEXT: - 8 +// CHECK-NEXT: - 16 +// CHECK-NEXT: - 32 +// CHECK: .name: test_kernel +// CHECK: .language: OpenCL C +// CHECK: .sgpr_count: 14 +// CHECK: .kernarg_segment_align: 64 +// CHECK: .vgpr_count: 40 +// CHECK: .language_version: +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 0 +// CHECK: .vec_type_hint: int +// CHECK: amdhsa.version: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 0 +// CHECK: amdhsa.printf: +// CHECK: - '1:1:4:%d\n' +// CHECK: - '2:1:8:%g\n' +// CHECK: .end_amd_amdgpu_hsa_metadata +.amd_amdgpu_hsa_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .language: OpenCL C + .language_version: + - 2 + - 0 + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + .reqd_workgroup_size: + - 1 + - 2 + - 4 + .workgroup_size_hint: + - 8 + - 16 + - 32 + .vec_type_hint: int +.end_amd_amdgpu_hsa_metadata Index: test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s + +// CHECK: .amd_amdgpu_hsa_metadata +// CHECK: amdhsa.kernels: +// CHECK: - .sgpr_count: 40 +// CHECK: .max_flat_workgroup_size: 256 +// CHECK: .symbol: 'test_kernel@kd' +// CHECK: .kernarg_segment_size: 24 +// CHECK: .group_segment_fixed_size: 24 +// CHECK: .private_segment_fixed_size: 16 +// CHECK: .vgpr_count: 14 +// CHECK: .vgpr_spill_count: 1 +// CHECK: .kernarg_segment_align: 16 +// CHECK: .sgpr_spill_count: 1 +// CHECK: .wavefront_size: 64 +// CHECK: .name: test_kernel +// CHECK: amdhsa.version: +// CHECK-NEXT: - 1 +// CHECK-NEXT: - 0 +.amd_amdgpu_hsa_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .kernarg_segment_size: 24 + .group_segment_fixed_size: 24 + .private_segment_fixed_size: 16 + .kernarg_segment_align: 16 + .wavefront_size: 64 + .max_flat_workgroup_size: 256 + .sgpr_count: 40 + .vgpr_count: 14 + .sgpr_spill_count: 1 + .vgpr_spill_count: 1 +.end_amd_amdgpu_hsa_metadata Index: test/MC/AMDGPU/hsa-v3.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/hsa-v3.s @@ -0,0 +1,310 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -mattr=+code-object-v3 -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF + +// ELF: Section { +// ELF: Name: .text +// ELF: Type: SHT_PROGBITS (0x1) +// ELF: Flags [ (0x6) +// ELF: SHF_ALLOC (0x2) +// ELF: SHF_EXECINSTR (0x4) + +// ELF: SHT_NOTE +// ELF: 0000: 04000000 08000000 01000000 414D4400 +// ELF: 0010: 02000000 00000000 04000000 1B000000 +// ELF: 0020: 03000000 414D4400 04000700 07000000 +// ELF: 0030: 00000000 00000000 414D4400 414D4447 +// ELF: 0040: 50550000 +// We can't check binary representation of metadata note: it is different on +// Windows and Linux because of carriage return on Windows + +// ELF: Symbol { +// ELF: Name: amd_kernel_code_t_minimal +// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Section: .text +// ELF: } +// ELF: Symbol { +// ELF: Name: amd_kernel_code_t_test_all +// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Section: .text +// ELF: } + +.text +// ASM: .text + +.hsa_code_object_version 2,0 +// ASM: .hsa_code_object_version 2,0 + +.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" + +.amd_amdgpu_hsa_metadata + amdhsa.version: + - 3 + - 0 + amdhsa.kernels: + - .name: amd_kernel_code_t_test_all + .symbol: amd_kernel_code_t_test_all@kd + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + - .name: amd_kernel_code_t_minimal + .symbol: amd_kernel_code_t_minimal@kd + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 +.end_amd_amdgpu_hsa_metadata + +// ASM: .amd_amdgpu_hsa_metadata +// ASM: amdhsa.kernels: +// ASM: - .sgpr_count: 14 +// ASM: .max_flat_workgroup_size: 256 +// ASM: .symbol: 'amd_kernel_code_t_test_all@kd' +// ASM: .kernarg_segment_size: 8 +// ASM: .group_segment_fixed_size: 16 +// ASM: .private_segment_fixed_size: 32 +// ASM: .vgpr_count: 40 +// ASM: .kernarg_segment_align: 64 +// ASM: .wavefront_size: 128 +// ASM: .name: amd_kernel_code_t_test_all +// ASM: - .sgpr_count: 14 +// ASM: .max_flat_workgroup_size: 256 +// ASM: .symbol: 'amd_kernel_code_t_minimal@kd' +// ASM: .kernarg_segment_size: 8 +// ASM: .group_segment_fixed_size: 16 +// ASM: .private_segment_fixed_size: 32 +// ASM: .vgpr_count: 40 +// ASM: .kernarg_segment_align: 64 +// ASM: .wavefront_size: 128 +// ASM: .name: amd_kernel_code_t_minimal +// ASM: amdhsa.version: +// ASM-NEXT: - 3 +// ASM-NEXT: - 0 +// ASM: .end_amd_amdgpu_hsa_metadata + +.amdgpu_hsa_kernel amd_kernel_code_t_test_all +.amdgpu_hsa_kernel amd_kernel_code_t_minimal + +amd_kernel_code_t_test_all: +; Test all amd_kernel_code_t members with non-default values. +.amd_kernel_code_t + kernel_code_version_major = 100 + kernel_code_version_minor = 100 + machine_kind = 0 + machine_version_major = 5 + machine_version_minor = 5 + machine_version_stepping = 5 + kernel_code_entry_byte_offset = 512 + kernel_code_prefetch_byte_size = 1 + max_scratch_backing_memory_byte_size = 1 + compute_pgm_rsrc1_vgprs = 1 + compute_pgm_rsrc1_sgprs = 1 + compute_pgm_rsrc1_priority = 1 + compute_pgm_rsrc1_float_mode = 1 + compute_pgm_rsrc1_priv = 1 + compute_pgm_rsrc1_dx10_clamp = 1 + compute_pgm_rsrc1_debug_mode = 1 + compute_pgm_rsrc1_ieee_mode = 1 + compute_pgm_rsrc2_scratch_en = 1 + compute_pgm_rsrc2_user_sgpr = 1 + compute_pgm_rsrc2_tgid_x_en = 1 + compute_pgm_rsrc2_tgid_y_en = 1 + compute_pgm_rsrc2_tgid_z_en = 1 + compute_pgm_rsrc2_tg_size_en = 1 + compute_pgm_rsrc2_tidig_comp_cnt = 1 + compute_pgm_rsrc2_excp_en_msb = 1 + compute_pgm_rsrc2_lds_size = 1 + compute_pgm_rsrc2_excp_en = 1 + enable_sgpr_private_segment_buffer = 1 + enable_sgpr_dispatch_ptr = 1 + enable_sgpr_queue_ptr = 1 + enable_sgpr_kernarg_segment_ptr = 1 + enable_sgpr_dispatch_id = 1 + enable_sgpr_flat_scratch_init = 1 + enable_sgpr_private_segment_size = 1 + enable_sgpr_grid_workgroup_count_x = 1 + enable_sgpr_grid_workgroup_count_y = 1 + enable_sgpr_grid_workgroup_count_z = 1 + enable_ordered_append_gds = 1 + private_element_size = 1 + is_ptr64 = 1 + is_dynamic_callstack = 1 + is_debug_enabled = 1 + is_xnack_enabled = 1 + workitem_private_segment_byte_size = 1 + workgroup_group_segment_byte_size = 1 + gds_segment_byte_size = 1 + kernarg_segment_byte_size = 1 + workgroup_fbarrier_count = 1 + wavefront_sgpr_count = 1 + workitem_vgpr_count = 1 + reserved_vgpr_first = 1 + reserved_vgpr_count = 1 + reserved_sgpr_first = 1 + reserved_sgpr_count = 1 + debug_wavefront_private_segment_offset_sgpr = 1 + debug_private_segment_buffer_sgpr = 1 + kernarg_segment_alignment = 5 + group_segment_alignment = 5 + private_segment_alignment = 5 + wavefront_size = 5 + call_convention = 1 + runtime_loader_kernel_symbol = 1 +.end_amd_kernel_code_t + +// ASM-LABEL: {{^}}amd_kernel_code_t_test_all: +// ASM: .amd_kernel_code_t +// ASM: amd_code_version_major = 100 +// ASM: amd_code_version_minor = 100 +// ASM: amd_machine_kind = 0 +// ASM: amd_machine_version_major = 5 +// ASM: amd_machine_version_minor = 5 +// ASM: amd_machine_version_stepping = 5 +// ASM: kernel_code_entry_byte_offset = 512 +// ASM: kernel_code_prefetch_byte_size = 1 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 1 +// ASM: float_mode = 1 +// ASM: priv = 1 +// ASM: enable_dx10_clamp = 1 +// ASM: debug_mode = 1 +// ASM: enable_ieee_mode = 1 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 1 +// ASM: user_sgpr_count = 1 +// ASM: enable_sgpr_workgroup_id_x = 1 +// ASM: enable_sgpr_workgroup_id_y = 1 +// ASM: enable_sgpr_workgroup_id_z = 1 +// ASM: enable_sgpr_workgroup_info = 1 +// ASM: enable_vgpr_workitem_id = 1 +// ASM: enable_exception_msb = 1 +// ASM: granulated_lds_size = 1 +// ASM: enable_exception = 1 +// ASM: enable_sgpr_private_segment_buffer = 1 +// ASM: enable_sgpr_dispatch_ptr = 1 +// ASM: enable_sgpr_queue_ptr = 1 +// ASM: enable_sgpr_kernarg_segment_ptr = 1 +// ASM: enable_sgpr_dispatch_id = 1 +// ASM: enable_sgpr_flat_scratch_init = 1 +// ASM: enable_sgpr_private_segment_size = 1 +// ASM: enable_sgpr_grid_workgroup_count_x = 1 +// ASM: enable_sgpr_grid_workgroup_count_y = 1 +// ASM: enable_sgpr_grid_workgroup_count_z = 1 +// ASM: enable_ordered_append_gds = 1 +// ASM: private_element_size = 1 +// ASM: is_ptr64 = 1 +// ASM: is_dynamic_callstack = 1 +// ASM: is_debug_enabled = 1 +// ASM: is_xnack_enabled = 1 +// ASM: workitem_private_segment_byte_size = 1 +// ASM: workgroup_group_segment_byte_size = 1 +// ASM: gds_segment_byte_size = 1 +// ASM: kernarg_segment_byte_size = 1 +// ASM: workgroup_fbarrier_count = 1 +// ASM: wavefront_sgpr_count = 1 +// ASM: workitem_vgpr_count = 1 +// ASM: reserved_vgpr_first = 1 +// ASM: reserved_vgpr_count = 1 +// ASM: reserved_sgpr_first = 1 +// ASM: reserved_sgpr_count = 1 +// ASM: debug_wavefront_private_segment_offset_sgpr = 1 +// ASM: debug_private_segment_buffer_sgpr = 1 +// ASM: kernarg_segment_alignment = 5 +// ASM: group_segment_alignment = 5 +// ASM: private_segment_alignment = 5 +// ASM: wavefront_size = 5 +// ASM: call_convention = 1 +// ASM: runtime_loader_kernel_symbol = 1 +// ASM: .end_amd_kernel_code_t + +amd_kernel_code_t_minimal: +.amd_kernel_code_t + enable_sgpr_kernarg_segment_ptr = 1 + is_ptr64 = 1 + granulated_workitem_vgpr_count = 1 + granulated_wavefront_sgpr_count = 1 + user_sgpr_count = 2 + kernarg_segment_byte_size = 16 + wavefront_sgpr_count = 8 +// wavefront_sgpr_count = 7 +; wavefront_sgpr_count = 7 +// Make sure a blank line won't break anything: + +// Make sure a line with whitespace won't break anything: + + workitem_vgpr_count = 16 +.end_amd_kernel_code_t + +// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: +// ASM: .amd_kernel_code_t +// ASM: amd_code_version_major = 1 +// ASM: amd_code_version_minor = 2 +// ASM: amd_machine_kind = 1 +// ASM: amd_machine_version_major = 7 +// ASM: amd_machine_version_minor = 0 +// ASM: amd_machine_version_stepping = 0 +// ASM: kernel_code_entry_byte_offset = 256 +// ASM: kernel_code_prefetch_byte_size = 0 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 0 +// ASM: float_mode = 0 +// ASM: priv = 0 +// ASM: enable_dx10_clamp = 0 +// ASM: debug_mode = 0 +// ASM: enable_ieee_mode = 0 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 +// ASM: user_sgpr_count = 2 +// ASM: enable_sgpr_workgroup_id_x = 0 +// ASM: enable_sgpr_workgroup_id_y = 0 +// ASM: enable_sgpr_workgroup_id_z = 0 +// ASM: enable_sgpr_workgroup_info = 0 +// ASM: enable_vgpr_workitem_id = 0 +// ASM: enable_exception_msb = 0 +// ASM: granulated_lds_size = 0 +// ASM: enable_exception = 0 +// ASM: enable_sgpr_private_segment_buffer = 0 +// ASM: enable_sgpr_dispatch_ptr = 0 +// ASM: enable_sgpr_queue_ptr = 0 +// ASM: enable_sgpr_kernarg_segment_ptr = 1 +// ASM: enable_sgpr_dispatch_id = 0 +// ASM: enable_sgpr_flat_scratch_init = 0 +// ASM: enable_sgpr_private_segment_size = 0 +// ASM: enable_sgpr_grid_workgroup_count_x = 0 +// ASM: enable_sgpr_grid_workgroup_count_y = 0 +// ASM: enable_sgpr_grid_workgroup_count_z = 0 +// ASM: enable_ordered_append_gds = 0 +// ASM: private_element_size = 0 +// ASM: is_ptr64 = 1 +// ASM: is_dynamic_callstack = 0 +// ASM: is_debug_enabled = 0 +// ASM: is_xnack_enabled = 0 +// ASM: workitem_private_segment_byte_size = 0 +// ASM: workgroup_group_segment_byte_size = 0 +// ASM: gds_segment_byte_size = 0 +// ASM: kernarg_segment_byte_size = 16 +// ASM: workgroup_fbarrier_count = 0 +// ASM: wavefront_sgpr_count = 8 +// ASM: workitem_vgpr_count = 16 +// ASM: reserved_vgpr_first = 0 +// ASM: reserved_vgpr_count = 0 +// ASM: reserved_sgpr_first = 0 +// ASM: reserved_sgpr_count = 0 +// ASM: debug_wavefront_private_segment_offset_sgpr = 0 +// ASM: debug_private_segment_buffer_sgpr = 0 +// ASM: kernarg_segment_alignment = 4 +// ASM: group_segment_alignment = 4 +// ASM: private_segment_alignment = 4 +// ASM: wavefront_size = 6 +// ASM: call_convention = -1 +// ASM: runtime_loader_kernel_symbol = 0 +// ASM: .end_amd_kernel_code_t Index: tools/llvm-readobj/ELFDumper.cpp =================================================================== --- tools/llvm-readobj/ELFDumper.cpp +++ tools/llvm-readobj/ELFDumper.cpp @@ -28,7 +28,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" @@ -3466,7 +3468,7 @@ return OS.str(); } -static std::string getAMDGPUNoteTypeName(const uint32_t NT) { +static std::string getAMDNoteTypeName(const uint32_t NT) { static const struct { uint32_t ID; const char *Name; @@ -3489,6 +3491,16 @@ return OS.str(); } +static std::string getAMDGPUNoteTypeName(const uint32_t NT) { + if (NT == ELF::NT_AMDGPU_METADATA) + return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)"); + + std::string string; + raw_string_ostream OS(string); + OS << format("Unknown note type (0x%08x)", NT); + return OS.str(); +} + template static void printGNUProperty(raw_ostream &OS, uint32_t Type, uint32_t DataSize, ArrayRef Data) { @@ -3609,8 +3621,8 @@ } template -static void printAMDGPUNote(raw_ostream &OS, uint32_t NoteType, - ArrayRef Words, size_t Size) { +static void printAMDNote(raw_ostream &OS, uint32_t NoteType, + ArrayRef Words, size_t Size) { switch (NoteType) { default: return; @@ -3640,6 +3652,32 @@ OS.flush(); } +template +static void printAMDGPUNote(raw_ostream &OS, uint32_t NoteType, + ArrayRef Words, size_t Size) { + OS << " AMDGPU Metadata:\n"; + auto MsgPackString = StringRef(reinterpret_cast(Words.data()), Size); + msgpack::Reader MsgPackReader(MsgPackString); + auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader); + if (auto Err = OptMsgPackNodeOrErr.takeError()) + return error(std::move(Err)); + auto &OptMsgPackNode = *OptMsgPackNodeOrErr; + if (!OptMsgPackNode) + return; + auto &MsgPackNode = *OptMsgPackNode; + + AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); + if (!Verifier.verify(MsgPackNode)) + reportError("invalid AMDGPU metadata"); + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << MsgPackNode; + OS << StrOS.str(); + OS.flush(); +} + template void GNUStyle::printNotes(const ELFFile *Obj) { const Elf_Ehdr *e = Obj->getHeader(); @@ -3666,6 +3704,9 @@ } else if (Name == "FreeBSD") { OS << getFreeBSDNoteTypeName(Type) << '\n'; } else if (Name == "AMD") { + OS << getAMDNoteTypeName(Type) << '\n'; + printAMDNote(OS, Type, Descriptor, Descriptor.size()); + } else if (Name == "AMDGPU") { OS << getAMDGPUNoteTypeName(Type) << '\n'; printAMDGPUNote(OS, Type, Descriptor, Descriptor.size()); } else {