Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -117,6 +117,10 @@ unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; + void emitStartOfOpenCLMetadata(const Module &M); + + void emitOpenCLMetadata(const Function &F); + protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "AMDGPUOpenCLMetadata.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" @@ -40,8 +41,13 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +using namespace ::AMDGPU; using namespace llvm; +namespace { + const char OpenCLMetadataSectionName[] = ".OpenCL.Metadata"; +} + // TODO: This should get the default rounding mode from the kernel. We just set // the default here, but this could change if the OpenCL rounding mode pragmas // are used. @@ -111,6 +117,7 @@ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + emitStartOfOpenCLMetadata(M); } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { @@ -237,6 +244,8 @@ } } + emitOpenCLMetadata(*MF.getFunction()); + return false; } @@ -711,3 +720,219 @@ *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); return false; } + +void AMDGPUAsmPrinter::emitStartOfOpenCLMetadata(const Module &M) { + unsigned N = 0; + for (auto &I : M.functions()) + if (I.getMetadata("kernel_arg_type")) + ++N; + + OutStreamer->SwitchSection(getObjFileLowering().getContext() + .getELFSection(OpenCLMetadataSectionName, ELF::SHT_PROGBITS, 0)); + OutStreamer->EmitIntValue(N, 4); +} + +static Twine getOCLTypeName(Type *Ty, bool isSigned) { + if (VectorType* VecTy = dyn_cast(Ty)) { + Type* EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getVectorNumElements(); + return getOCLTypeName(EleTy, isSigned) + Twine(Size); + } + switch (Ty->getTypeID()) { + case Type::HalfTyID: return "half"; + case Type::FloatTyID: return "float"; + case Type::DoubleTyID: return "double"; + case Type::IntegerTyID: { + if (!isSigned) + return Twine("u") + getOCLTypeName(Ty, true); + auto IntTy = cast(Ty); + auto BW = IntTy->getIntegerBitWidth(); + switch (BW) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return Twine("i") + Twine(BW); + } + } + default: + llvm_unreachable("invalid type"); + } +} + +void AMDGPUAsmPrinter::emitOpenCLMetadata(const Function &F) { + if (!F.getMetadata("kernel_arg_type")) + return; + + MCContext &Context = getObjFileLowering().getContext(); + OutStreamer->SwitchSection( + Context.getELFSection(OpenCLMetadataSectionName, ELF::SHT_PROGBITS, 0)); + OutStreamer->EmitIntValue(F.getFunctionType()->getNumParams(), 4); + + for (auto &Arg:F.args()) { + unsigned I = Arg.getArgNo(); + + struct KernelArgFlag : public KernelArg::Flag { + void setTypeKind(Type *T, StringRef TypeName) { + if (TypeName == "sampler_t") + TypeKind = (unsigned)KernelArg::Sampler; + else if (TypeName == "queue_t") + TypeKind = (unsigned)KernelArg::Queue; + else if (TypeName == "image1d_t" || + TypeName == "image1d_array_t" || + TypeName == "image1d_buffer_t" || + TypeName == "image2d_t" || + TypeName == "image2d_array_t" || + TypeName == "image2d_depth_t" || + TypeName == "image2d_array_depth_t" || + TypeName == "image2d_msaa_t" || + TypeName == "image2d_array_msaa_t" || + TypeName == "image2d_msaa_depth_t" || + TypeName == "image2d_array_msaa_depth_t" || + TypeName == "image3d_t") + TypeKind = (unsigned)KernelArg::Image; + else if (isa(T)) + TypeKind = (unsigned)KernelArg::Pointer; + else + TypeKind = (unsigned)KernelArg::Value; + } + + void setDataType(Type *Ty, StringRef TypeName) { + if (isa(Ty)) + DataType = (unsigned)KernelArg::Struct; + else if (Ty->isHalfTy()) + DataType = (unsigned)KernelArg::F16; + else if (Ty->isFloatTy()) + DataType = (unsigned)KernelArg::F32; + else if (Ty->isDoubleTy()) + DataType = (unsigned)KernelArg::F64; + else if (IntegerType* intTy = dyn_cast(Ty)) { + bool Signed = !TypeName.startswith("u"); + switch (intTy->getIntegerBitWidth()) { + case 8: + DataType = (unsigned)(Signed ? KernelArg::I8 : KernelArg::U8); + break; + case 16: + DataType = (unsigned)(Signed ? KernelArg::I16 : KernelArg::U16); + break; + case 32: + DataType = (unsigned)(Signed ? KernelArg::I32 : KernelArg::U32); + break; + case 64: + DataType = (unsigned)(Signed ? KernelArg::I64 : KernelArg::U64); + break; + default: + llvm_unreachable("invalid integer type"); + } + } + } + + void setTypeQualifier(StringRef Q) { + SmallVector SplitQ; + Q.split(SplitQ, " ", -1, false/* drop empty entry*/); + TypeQual = 0; + for (auto &I:SplitQ) { + TypeQual |= StringSwitch(I) + .Case("volatile", KernelArg::Volatile) + .Case("restrict", KernelArg::Restrict) + .Case("const", KernelArg::Const) + .Case("pipe", KernelArg::Pipe) + .Default(0); + } + } + + void setAccessQualifier(StringRef Q) { + AccQual = StringSwitch(Q) + .Case("read_only", KernelArg::ReadOnly) + .Case("write_only", KernelArg::WriteOnly) + .Case("read_write", KernelArg::ReadWrite) + .Default(KernelArg::None); + } + + // No translation is needed for address space. + void setAddressQualifier (unsigned A) { + AddrQual = A; + } + + void setHasName(bool B) { + HasName = B; + } + + // Initialize Flag for the \p I-th argument of function \p F. + KernelArgFlag(const Function &F, unsigned I) { + auto T = F.getFunctionType()->getParamType(I); + auto BaseTypeName = cast( + F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString(); + auto TypeQual = cast(F.getMetadata( + "kernel_arg_type_qual")->getOperand(I))->getString(); + auto AccQual = cast(F.getMetadata( + "kernel_arg_access_qual")->getOperand(I))->getString(); + setTypeKind(T, BaseTypeName); + setDataType(T, BaseTypeName); + setTypeQualifier(TypeQual); + setAccessQualifier(AccQual); + setAddressQualifier(isa(T) ? + T->getPointerAddressSpace() : 0); + setHasName(F.getMetadata("kernel_arg_name")); + } + } Flag(F, I); + + OutStreamer->EmitIntValue(Flag.getAsUnsignedInt(), 4); + + auto T = Arg.getType(); + auto DL = F.getParent()->getDataLayout(); + OutStreamer->EmitIntValue(DL.getTypeAllocSize(T), 4); + OutStreamer->EmitIntValue(DL.getABITypeAlignment(T), 4); + + auto TypeName = dyn_cast(F.getMetadata( + "kernel_arg_type")->getOperand(I))->getString(); + OutStreamer->EmitIntValue(TypeName.size(), 4); + OutStreamer->EmitBytes(TypeName); + + if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) { + auto ArgName = cast(ArgNameMD->getOperand( + I))->getString(); + OutStreamer->EmitIntValue(ArgName.size(), 4); + OutStreamer->EmitBytes(ArgName); + } + } + + auto RWGS = F.getMetadata("reqd_work_group_size"); + auto WGSH = F.getMetadata("work_group_size_hint"); + auto VTH = F.getMetadata("vec_type_hint"); + ::AMDGPU::Kernel::Flag Flag{}; + + Flag.HasReqdWorkGroupSize = RWGS != nullptr; + Flag.HasWorkGroupSizeHint = WGSH != nullptr; + Flag.HasVecTypeHint = VTH != nullptr; + OutStreamer->EmitIntValue(Flag.getAsUnsignedInt(), 4); + + if (RWGS) { + OutStreamer->EmitIntValue(mdconst::extract( + RWGS->getOperand(0))->getZExtValue(), 4); + OutStreamer->EmitIntValue(mdconst::extract( + RWGS->getOperand(1))->getZExtValue(), 4); + OutStreamer->EmitIntValue(mdconst::extract( + RWGS->getOperand(2))->getZExtValue(), 4); + } + if (WGSH) { + OutStreamer->EmitIntValue(mdconst::extract( + WGSH->getOperand(0))->getZExtValue(), 4); + OutStreamer->EmitIntValue(mdconst::extract( + WGSH->getOperand(1))->getZExtValue(), 4); + OutStreamer->EmitIntValue(mdconst::extract( + WGSH->getOperand(2))->getZExtValue(), 4); + } + if (VTH) { + auto TypeName = getOCLTypeName(cast( + VTH->getOperand(0))->getType(), mdconst::extract( + VTH->getOperand(1))->getZExtValue()).str(); + OutStreamer->EmitIntValue(TypeName.size(), 4); + OutStreamer->EmitBytes(TypeName); + } +} Index: lib/Target/AMDGPU/AMDGPUOpenCLMetadata.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUOpenCLMetadata.h @@ -0,0 +1,97 @@ +//===-- AMDGPUOpenCLMetadata.h - AMDGPU OpenCL Metadata Header File -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Enums and structure types used by OpenCL metadata. +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUOPENCLMETADATA_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUOPENCLMETADATA_H + +#include +#include + +namespace AMDGPU { + + namespace KernelArg { + enum Type : char { + Pointer = 0, + Value = 1, + Image = 2, + Sampler = 3, + Queue = 4, + }; + + enum DataType : char { + Struct = 0, + I8 = 1, + U8 = 2, + I16 = 3, + U16 = 4, + F16 = 5, + I32 = 6, + U32 = 7, + F32 = 8, + I64 = 9, + U64 = 10, + F64 = 11, + }; + + enum TypeQualifier : char { + Volatile = 1, + Restrict = 2, + Pipe = 4, + Const = 8, + }; + + enum AccessQualifer : char { + ReadOnly = 0, + WriteOnly = 1, + ReadWrite = 2, + None = 3, + }; + + struct Flag { + unsigned TypeKind : 3; + unsigned DataType : 4; + unsigned HasName : 1; // Whether the argument has name + unsigned TypeQual : 4; // Type qualifier + unsigned AccQual : 2; // Access qualifier + unsigned AddrQual : 2; // Address qualifier + + unsigned getAsUnsignedInt() { + return TypeKind + | DataType << 3 + | HasName << 7 + | TypeQual << 8 + | AccQual << 12 + | AddrQual << 14; + } + }; + } // namespace KernelArg + + namespace Kernel { + struct Flag { + unsigned HasReqdWorkGroupSize : 1; // Has reqd_work_group_size attribute + unsigned HasWorkGroupSizeHint : 1; // Has work_group_size_hint attribute + unsigned HasVecTypeHint : 1; // Has vec_type_hint attribute + unsigned IsDevEnqKernel : 1; // Is device enqueue kernel + unsigned getAsUnsignedInt() { + return HasReqdWorkGroupSize + | HasWorkGroupSizeHint << 1 + | HasVecTypeHint << 2 + | IsDevEnqKernel << 3; + } + }; + } // namespace Kernel +} + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUOPENCLMETADATA_H Index: test/CodeGen/AMDGPU/kernel-attributes.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/kernel-attributes.ll @@ -0,0 +1,359 @@ +; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s + +%struct.A = type { i8, float } +%opencl.image1d_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque +%opencl.queue_t = type opaque +%opencl.pipe_t = type opaque + +; CHECK: .section .OpenCL.Metadata +; CHECK-NEXT: .long 19 + +; CHECK-LABEL:{{^}}test_char: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12297 +; CHECK: .long 1 +; CHECK: .long 1 +; CHECK: .long 4 +; CHECK: .ascii "char" +; CHECK: .long 0 + +define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_ushort2: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12321 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 7 +; CHECK: .ascii "ushort2" +; CHECK: .long 0 + +define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_int3: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12321 +; CHECK: .long 16 +; CHECK: .long 16 +; CHECK: .long 4 +; CHECK: .ascii "int3" +; CHECK: .long 0 + +define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_ulong4: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12321 +; CHECK: .long 32 +; CHECK: .long 32 +; CHECK: .long 6 +; CHECK: .ascii "ulong4" +; CHECK: .long 0 + +define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_half8: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12321 +; CHECK: .long 16 +; CHECK: .long 16 +; CHECK: .long 5 +; CHECK: .ascii "half8" +; CHECK: .long 0 + +define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_float16: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12321 +; CHECK: .long 64 +; CHECK: .long 64 +; CHECK: .long 7 +; CHECK: .ascii "float16" +; CHECK: .long 0 + +define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_double16: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12321 +; CHECK: .long 128 +; CHECK: .long 128 +; CHECK: .long 8 +; CHECK: .ascii "double16" +; CHECK: .long 0 + +define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_pointer: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 28704 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 0 + +define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_image: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 28706 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 9 +; CHECK: .ascii "image2d_t" +; CHECK: .long 0 + +define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_sampler: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12339 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 9 +; CHECK: .ascii "sampler_t" +; CHECK: .long 0 + +define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_queue: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 28708 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 7 +; CHECK: .ascii "queue_t" +; CHECK: .long 0 + +define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_struct: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12320 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 8 +; CHECK: .ascii "struct A" +; CHECK: .long 0 + +define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_i128: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 1 +; CHECK: .long 12297 +; CHECK: .long 16 +; CHECK: .long 8 +; CHECK: .long 4 +; CHECK: .ascii "i128" +; CHECK: .long 0 + +define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK-LABEL:{{^}}test_multi_arg: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 3 +; CHECK: .long 12337 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 3 +; CHECK: .ascii "int" +; CHECK: .long 12337 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 6 +; CHECK: .ascii "short2" +; CHECK: .long 12337 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 5 +; CHECK: .ascii "char3" +; CHECK: .long 0 + +define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK-LABEL:{{^}}test_addr_space: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 3 +; CHECK: .long 28704 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 45088 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 61472 +; CHECK: .long 4 +; CHECK: .long 4 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 0 + +define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK-LABEL:{{^}}test_type_qual: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 3 +; CHECK: .long 28960 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 31264 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 29728 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 5 +; CHECK: .ascii "int *" +; CHECK: .long 0 + +define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { + ret void +} + +; CHECK-LABEL:{{^}}test_access_qual: +; CHECK: .section .OpenCL.Metadata +; CHECK: .long 3 +; CHECK: .long 16418 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 9 +; CHECK: .ascii "image1d_t" +; CHECK: .long 20514 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 9 +; CHECK: .ascii "image2d_t" +; CHECK: .long 24610 +; CHECK: .long 8 +; CHECK: .long 8 +; CHECK: .long 9 +; CHECK: .ascii "image3d_t" +; CHECK: .long 0 + +define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { + ret void +} + +; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint: +; CHECK: .section .OpenCL.Metadata +; CHECK-NEXT:.long 1 +; CHECK-NEXT:.long 12337 +; CHECK-NEXT:.long 4 +; CHECK-NEXT:.long 4 +; CHECK-NEXT:.long 3 +; CHECK-NEXT:.ascii "int" +; CHECK-NEXT:.long 5 +; CHECK-NEXT:.long 1 +; CHECK-NEXT:.long 2 +; CHECK-NEXT:.long 4 +; CHECK-NEXT:.long 3 +; CHECK-NEXT:.ascii "int" + +define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 { + ret void +} + +; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint: +; CHECK: .section .OpenCL.Metadata +; CHECK-NEXT:.long 1 +; CHECK-NEXT:.long 12337 +; CHECK-NEXT:.long 4 +; CHECK-NEXT:.long 4 +; CHECK-NEXT:.long 3 +; CHECK-NEXT:.ascii "int" +; CHECK-NEXT:.long 6 +; CHECK-NEXT:.long 8 +; CHECK-NEXT:.long 16 +; CHECK-NEXT:.long 32 +; CHECK-NEXT:.long 5 +; CHECK-NEXT:.ascii "uint4" + +define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 { + ret void +} + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"int"} +!4 = !{!""} +!5 = !{i32 undef, i32 1} +!6 = !{i32 1, i32 2, i32 4} +!7 = !{<4 x i32> undef, i32 0} +!8 = !{i32 8, i32 16, i32 32} +!9 = !{!"char"} +!10 = !{!"ushort2"} +!11 = !{!"int3"} +!12 = !{!"ulong4"} +!13 = !{!"half8"} +!14 = !{!"float16"} +!15 = !{!"double16"} +!16 = !{!"int *"} +!17 = !{!"image2d_t"} +!18 = !{!"sampler_t"} +!19 = !{!"queue_t"} +!20 = !{!"struct A"} +!21 = !{!"i128"} +!22 = !{i32 0, i32 0, i32 0} +!23 = !{!"none", !"none", !"none"} +!24 = !{!"int", !"short2", !"char3"} +!25 = !{!"", !"", !""} +!50 = !{i32 1, i32 2, i32 3} +!51 = !{!"int *", !"int *", !"int *"} +!60 = !{i32 1, i32 1, i32 1} +!61 = !{!"read_only", !"write_only", !"read_write"} +!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} +!70 = !{!"volatile", !"const restrict", !"pipe"}