Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -127,6 +127,10 @@ unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; + void emitStartOfRuntimeMetadata(const Module &M); + + void emitRuntimeMetadata(const Function &F); + protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -39,7 +39,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "AMDGPURuntimeMetadata.h" +using namespace ::AMDGPU; using namespace llvm; // TODO: This should get the default rounding mode from the kernel. We just set @@ -111,6 +113,7 @@ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + emitStartOfRuntimeMetadata(M); } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { @@ -244,6 +247,8 @@ } } + emitRuntimeMetadata(*MF.getFunction()); + return false; } @@ -740,3 +745,227 @@ *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); return false; } + +// Emit a key and an integer value for runtime metadata. +static void emitRuntimeMDIntValue(std::unique_ptr &Streamer, + RuntimeMD::Key K, uint64_t V, + unsigned Size) { + Streamer->EmitIntValue(K, 1); + Streamer->EmitIntValue(V, Size); +} + +// Emit a key and a string value for runtime metadata. +static void emitRuntimeMDStringValue(std::unique_ptr &Streamer, + RuntimeMD::Key K, StringRef S) { + Streamer->EmitIntValue(K, 1); + Streamer->EmitIntValue(S.size(), 4); + Streamer->EmitBytes(S); +} + +// Emit a key and three integer values for runtime metadata. +// The three integer values are obtained from MDNode \p Node; +static void emitRuntimeMDThreeIntValues(std::unique_ptr &Streamer, + RuntimeMD::Key K, MDNode *Node, + unsigned Size) { + Streamer->EmitIntValue(K, 1); + Streamer->EmitIntValue(mdconst::extract( + Node->getOperand(0))->getZExtValue(), Size); + Streamer->EmitIntValue(mdconst::extract( + Node->getOperand(1))->getZExtValue(), Size); + Streamer->EmitIntValue(mdconst::extract( + Node->getOperand(2))->getZExtValue(), Size); +} + +void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) { + OutStreamer->SwitchSection(getObjFileLowering().getContext() + .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); + + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion, + RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); + if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage, + RuntimeMD::OpenCL_C, 1); + auto Node = MD->getOperand(0); + unsigned short Major = mdconst::extract(Node->getOperand(0)) + ->getZExtValue(); + unsigned short Minor = mdconst::extract(Node->getOperand(1)) + ->getZExtValue(); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion, + Major * 100 + Minor * 10, 2); + } +} + +static Twine getOCLTypeName(Type *Ty, bool isSigned) { + if (VectorType* VecTy = dyn_cast(Ty)) { + Type* EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getVectorNumElements(); + return getOCLTypeName(EleTy, isSigned) + Twine(Size); + } + switch (Ty->getTypeID()) { + case Type::HalfTyID: return "half"; + case Type::FloatTyID: return "float"; + case Type::DoubleTyID: return "double"; + case Type::IntegerTyID: { + if (!isSigned) + return Twine('u') + getOCLTypeName(Ty, true); + auto IntTy = cast(Ty); + auto BW = IntTy->getIntegerBitWidth(); + switch (BW) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return Twine("i") + Twine(BW); + } + } + default: + llvm_unreachable("invalid type"); + } +} + +static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( + Type *Ty, StringRef TypeName) { + if (auto VT = dyn_cast(Ty)) + return getRuntimeMDValueType(VT->getElementType(), TypeName); + else if (auto PT = dyn_cast(Ty)) + return getRuntimeMDValueType(PT->getElementType(), TypeName); + else if (Ty->isHalfTy()) + return RuntimeMD::KernelArg::F16; + else if (Ty->isFloatTy()) + return RuntimeMD::KernelArg::F32; + else if (Ty->isDoubleTy()) + return RuntimeMD::KernelArg::F64; + else if (IntegerType* intTy = dyn_cast(Ty)) { + bool Signed = !TypeName.startswith("u"); + switch (intTy->getIntegerBitWidth()) { + case 8: + return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8; + case 16: + return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16; + case 32: + return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32; + case 64: + return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64; + default: + // Runtime does not recognize other integer types. Report as + // struct type. + return RuntimeMD::KernelArg::Struct; + } + } else + return RuntimeMD::KernelArg::Struct; +} + +void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { + if (!F.getMetadata("kernel_arg_type")) + return; + + MCContext &Context = getObjFileLowering().getContext(); + OutStreamer->SwitchSection( + Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); + OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName()); + + for (auto &Arg:F.args()) { + // Emit KeyArgBegin. + unsigned I = Arg.getArgNo(); + OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1); + + // Emit KeyArgSize and KeyArgAlign. + auto T = Arg.getType(); + auto DL = F.getParent()->getDataLayout(); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize, + DL.getTypeAllocSize(T), 4); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign, + DL.getABITypeAlignment(T), 4); + + // Emit KeyArgTypeName. + auto TypeName = dyn_cast(F.getMetadata( + "kernel_arg_type")->getOperand(I))->getString(); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); + + // Emit KeyArgName. + if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) { + auto ArgName = cast(ArgNameMD->getOperand( + I))->getString(); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName); + } + + // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. + auto TypeQual = cast(F.getMetadata( + "kernel_arg_type_qual")->getOperand(I))->getString(); + SmallVector SplitQ; + TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/); + for (auto &I:SplitQ) { + auto Key = StringSwitch(I) + .Case("volatile", RuntimeMD::KeyArgIsVolatile) + .Case("restrict", RuntimeMD::KeyArgIsRestrict) + .Case("const", RuntimeMD::KeyArgIsConst) + .Case("pipe", RuntimeMD::KeyArgIsPipe) + .Default(RuntimeMD::KeyNull); + OutStreamer->EmitIntValue(Key, 1); + } + + // Emit KeyArgTypeKind. + auto BaseTypeName = cast( + F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString(); + auto TypeKind = StringSwitch(BaseTypeName) + .Case("sampler_t", RuntimeMD::KernelArg::Sampler) + .Case("queue_t", RuntimeMD::KernelArg::Queue) + .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", + "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image) + .Cases("image2d_depth_t", "image2d_array_depth_t", + "image2d_msaa_t", "image2d_array_msaa_t", + "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) + .Cases("image2d_array_msaa_depth_t", "image3d_t", + RuntimeMD::KernelArg::Image) + .Default(isa(T) ? RuntimeMD::KernelArg::Pointer : + RuntimeMD::KernelArg::Value); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1); + + // Emit KeyArgValueType. + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType, + getRuntimeMDValueType(T, BaseTypeName), 2); + + // Emit KeyArgAccQual. + auto AccQual = cast(F.getMetadata( + "kernel_arg_access_qual")->getOperand(I))->getString(); + auto AQ = StringSwitch(AccQual) + .Case("read_only", RuntimeMD::KernelArg::ReadOnly) + .Case("write_only", RuntimeMD::KernelArg::WriteOnly) + .Case("read_write", RuntimeMD::KernelArg::ReadWrite) + .Default(RuntimeMD::KernelArg::None); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, + AQ, 1); + + // Emit KeyArgAddrQual. + if (isa(T)) + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual, + T->getPointerAddressSpace(), 1); + + // Emit KeyArgEnd + OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1); + } + + // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. + if (auto RWGS = F.getMetadata("reqd_work_group_size")) + emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize, + RWGS, 4); + if (auto WGSH = F.getMetadata("work_group_size_hint")) + emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint, + WGSH, 4); + if (auto VTH = F.getMetadata("vec_type_hint")) { + auto TypeName = getOCLTypeName(cast( + VTH->getOperand(0))->getType(), mdconst::extract( + VTH->getOperand(1))->getZExtValue()).str(); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint, + TypeName); + } + + // Emit KeyKernelEnd + OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1); +} Index: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -0,0 +1,138 @@ +//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Enums and structure types used by runtime metadata. +/// +/// Runtime requests certain information (metadata) about kernels to be able +/// to execute the kernels and answer the queries about the kernels. +/// The metadata is represented as a byte stream in an ELF section of a +/// binary (code object). The byte stream consists of key-value pairs. +/// Each key is an 8 bit unsigned integer. Each value can be an integer, +/// a string, or a stream of key-value pairs. There are 3 levels of key-value +/// pair streams. At the beginning of the ELF section is the top level +/// key-value pair stream. A kernel-level key-value pair stream starts after +/// encountering KeyKernelBegin and ends immediately before encountering +/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts +/// after encountering KeyArgBegin and ends immediately before encountering +/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top +/// level key-value pair stream. A kernel-argument-level key-value pair stream +/// can only appear in a kernel-level key-value pair stream. +/// +/// The format should be kept backward compatible. New enum values and bit +/// fields should be appended at the end. It is suggested to bump up the +/// revision number whenever the format changes and document the change +/// in the revision in this header. +/// +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H + +#include + +namespace AMDGPU { + +namespace RuntimeMD { + + // Version and revision of runtime metadata + const unsigned char MDVersion = 1; + const unsigned char MDRevision = 0; + + // ELF section name containing runtime metadata + const char SectionName[] = ".AMDGPU.runtime_metadata"; + + // Enumeration values of keys in runtime metadata. + enum Key { + KeyNull = 0, // Place holder. Ignored when encountered + KeyMDVersion = 1, // Runtime metadata version + KeyLanguage = 2, // Language + KeyLanguageVersion = 3, // Language version + KeyKernelBegin = 4, // Beginning of kernel-level stream + KeyKernelEnd = 5, // End of kernel-level stream + KeyKernelName = 6, // Kernel name + KeyArgBegin = 7, // Beginning of kernel-arg-level stream + KeyArgEnd = 8, // End of kernel-arg-level stream + KeyArgSize = 9, // Kernel arg size + KeyArgAlign = 10, // Kernel arg alignment + KeyArgTypeName = 11, // Kernel type name + KeyArgName = 12, // Kernel name + KeyArgTypeKind = 13, // Kernel argument type kind + KeyArgValueType = 14, // Kernel argument value type + KeyArgAddrQual = 15, // Kernel argument address qualifier + KeyArgAccQual = 16, // Kernel argument access qualifier + KeyArgIsConst = 17, // Kernel argument is const qualified + KeyArgIsRestrict = 18, // Kernel argument is restrict qualified + KeyArgIsVolatile = 19, // Kernel argument is volatile qualified + KeyArgIsPipe = 20, // Kernel argument is pipe qualified + KeyReqdWorkGroupSize = 21, // Required work group size + KeyWorkGroupSizeHint = 22, // Work group size hint + KeyVecTypeHint = 23, // Vector type hint + KeyKernelIndex = 24, // Kernel index for device enqueue + KeySGPRs = 25, // Number of SGPRs + KeyVGPRs = 26, // Number of VGPRs + KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD + KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD + KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits + KeyMaxWorkGroupSize = 30, // Maximum work group size + KeyNoPartialWorkGroups = 31, // No partial work groups + }; + + enum Language : uint8_t { + OpenCL_C = 0, + HCC = 1, + OpenMP = 2, + OpenCL_CPP = 3, +}; + + enum LanguageVersion : uint16_t { + V100 = 100, + V110 = 110, + V120 = 120, + V200 = 200, + V210 = 210, + }; + + namespace KernelArg { + enum TypeKind : uint8_t { + Value = 0, + Pointer = 1, + Image = 2, + Sampler = 3, + Queue = 4, + }; + + enum ValueType : uint16_t { + Struct = 0, + I8 = 1, + U8 = 2, + I16 = 3, + U16 = 4, + F16 = 5, + I32 = 6, + U32 = 7, + F32 = 8, + I64 = 9, + U64 = 10, + F64 = 11, + }; + + enum AccessQualifer : uint8_t { + None = 0, + ReadOnly = 1, + WriteOnly = 2, + ReadWrite = 3, + }; + } // namespace KernelArg +} // namespace RuntimeMD +} // namespace AMDGPU + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H Index: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll +++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll @@ -0,0 +1,848 @@ +; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s + +%struct.A = type { i8, float } +%opencl.image1d_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque +%opencl.queue_t = type opaque +%opencl.pipe_t = type opaque +%struct.B = type { i32 addrspace(1)*} +%opencl.clk_event_t = type opaque + +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .short 256 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .short 200 + +; CHECK-LABEL:{{^}}test_char: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "test_char" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .ascii "char" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_ushort2: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 12 +; CHECK-NEXT: .ascii "test_ushort2" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "ushort2" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_int3: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "test_int3" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .ascii "int3" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_ulong4: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 11 +; CHECK-NEXT: .ascii "test_ulong4" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 32 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 32 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .ascii "ulong4" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 10 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_half8: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .ascii "test_half8" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "half8" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 5 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_float16: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 12 +; CHECK-NEXT: .ascii "test_float16" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 64 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 64 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "float16" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_double16: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 13 +; CHECK-NEXT: .ascii "test_double16" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 128 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 128 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .ascii "double16" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 11 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_pointer: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 12 +; CHECK-NEXT: .ascii "test_pointer" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_image: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .ascii "test_image" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "image2d_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_sampler: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 12 +; CHECK-NEXT: .ascii "test_sampler" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "sampler_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_queue: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .ascii "test_queue" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "queue_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_struct: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 11 +; CHECK-NEXT: .ascii "test_struct" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .ascii "struct A" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_i128: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "test_i128" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .ascii "i128" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_multi_arg: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 14 +; CHECK-NEXT: .ascii "test_multi_arg" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .ascii "int" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .ascii "short2" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 3 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "char3" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK-LABEL:{{^}}test_addr_space: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 15 +; CHECK-NEXT: .ascii "test_addr_space" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK-LABEL:{{^}}test_type_qual: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 14 +; CHECK-NEXT: .ascii "test_type_qual" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 19 +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 17 +; CHECK-NEXT: .byte 18 +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "int *" +; CHECK-NEXT: .byte 20 +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { + ret void +} + +; CHECK-LABEL:{{^}}test_access_qual: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .ascii "test_access_qual" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "image1d_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "image2d_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .ascii "image3d_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 27 +; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .ascii "int" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 21 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 23 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .ascii "int" +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 { + ret void +} + +; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 27 +; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .ascii "int" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 22 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long 32 +; CHECK-NEXT: .byte 23 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .ascii "uint4" +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 { + ret void +} + +; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 19 +; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .ascii "int **" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 28 +; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .ascii "struct B" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_arg_vector_of_ptr: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 22 +; CHECK-NEXT: .ascii "test_arg_vector_of_ptr" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 47 +; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 29 +; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 11 +; CHECK-NEXT: .ascii "clk_event_t" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { + ret void +} + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"int"} +!4 = !{!""} +!5 = !{i32 undef, i32 1} +!6 = !{i32 1, i32 2, i32 4} +!7 = !{<4 x i32> undef, i32 0} +!8 = !{i32 8, i32 16, i32 32} +!9 = !{!"char"} +!10 = !{!"ushort2"} +!11 = !{!"int3"} +!12 = !{!"ulong4"} +!13 = !{!"half8"} +!14 = !{!"float16"} +!15 = !{!"double16"} +!16 = !{!"int *"} +!17 = !{!"image2d_t"} +!18 = !{!"sampler_t"} +!19 = !{!"queue_t"} +!20 = !{!"struct A"} +!21 = !{!"i128"} +!22 = !{i32 0, i32 0, i32 0} +!23 = !{!"none", !"none", !"none"} +!24 = !{!"int", !"short2", !"char3"} +!25 = !{!"", !"", !""} +!50 = !{i32 1, i32 2, i32 3} +!51 = !{!"int *", !"int *", !"int *"} +!60 = !{i32 1, i32 1, i32 1} +!61 = !{!"read_only", !"write_only", !"read_write"} +!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} +!70 = !{!"volatile", !"const restrict", !"pipe"} +!80 = !{!"int **"} +!81 = !{i32 1} +!82 = !{!"struct B"} +!83 = !{!"global int* __attribute__((ext_vector_type(2)))"} +!84 = !{!"clk_event_t"} +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0}