Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -150,10 +150,6 @@ unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; - void emitStartOfRuntimeMetadata(const Module &M); - - void emitRuntimeMetadata(const Function &F); - protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -39,9 +39,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "AMDGPURuntimeMetadata.h" -using namespace ::AMDGPU; using namespace llvm; // TODO: This should get the default rounding mode from the kernel. We just set @@ -119,7 +117,9 @@ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); - emitStartOfRuntimeMetadata(M); + + // Emit runtime metadata. + TS->emitRuntimeMetadataAsNoteElement(M); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -279,8 +279,6 @@ } } - emitRuntimeMetadata(*MF.getFunction()); - return false; } @@ -793,307 +791,3 @@ return false; } -// Emit a key and an integer value for runtime metadata. -static void emitRuntimeMDIntValue(MCStreamer &Streamer, - RuntimeMD::Key K, uint64_t V, - unsigned Size) { - Streamer.EmitIntValue(K, 1); - Streamer.EmitIntValue(V, Size); -} - -// Emit a key and a string value for runtime metadata. -static void emitRuntimeMDStringValue(MCStreamer &Streamer, - RuntimeMD::Key K, StringRef S) { - Streamer.EmitIntValue(K, 1); - Streamer.EmitIntValue(S.size(), 4); - Streamer.EmitBytes(S); -} - -// Emit a key and three integer values for runtime metadata. -// The three integer values are obtained from MDNode \p Node; -static void emitRuntimeMDThreeIntValues(MCStreamer &Streamer, - RuntimeMD::Key K, MDNode *Node, - unsigned Size) { - assert(Node->getNumOperands() == 3); - - Streamer.EmitIntValue(K, 1); - for (const MDOperand &Op : Node->operands()) { - const ConstantInt *CI = mdconst::extract(Op); - Streamer.EmitIntValue(CI->getZExtValue(), Size); - } -} - -void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) { - OutStreamer->SwitchSection(getObjFileLowering().getContext() - .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); - - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyMDVersion, - RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); - if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { - if (MD->getNumOperands() != 0) { - auto Node = MD->getOperand(0); - if (Node->getNumOperands() > 1) { - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyLanguage, - RuntimeMD::OpenCL_C, 1); - uint16_t Major = mdconst::extract(Node->getOperand(0)) - ->getZExtValue(); - uint16_t Minor = mdconst::extract(Node->getOperand(1)) - ->getZExtValue(); - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyLanguageVersion, - Major * 100 + Minor * 10, 2); - } - } - } - - if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { - for (unsigned I = 0; I < MD->getNumOperands(); ++I) { - auto Node = MD->getOperand(I); - if (Node->getNumOperands() > 0) - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyPrintfInfo, - cast(Node->getOperand(0))->getString()); - } - } -} - -static std::string getOCLTypeName(Type *Ty, bool Signed) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return "half"; - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - return "double"; - case Type::IntegerTyID: { - if (!Signed) - return (Twine('u') + getOCLTypeName(Ty, true)).str(); - unsigned BW = Ty->getIntegerBitWidth(); - switch (BW) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BW)).str(); - } - } - case Type::VectorTyID: { - VectorType *VecTy = cast(Ty); - Type *EleTy = VecTy->getElementType(); - unsigned Size = VecTy->getVectorNumElements(); - return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); - } - default: - return "unknown"; - } -} - -static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( - Type *Ty, StringRef TypeName) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return RuntimeMD::KernelArg::F16; - case Type::FloatTyID: - return RuntimeMD::KernelArg::F32; - case Type::DoubleTyID: - return RuntimeMD::KernelArg::F64; - case Type::IntegerTyID: { - bool Signed = !TypeName.startswith("u"); - switch (Ty->getIntegerBitWidth()) { - case 8: - return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8; - case 16: - return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16; - case 32: - return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32; - case 64: - return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64; - default: - // Runtime does not recognize other integer types. Report as struct type. - return RuntimeMD::KernelArg::Struct; - } - } - case Type::VectorTyID: - return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); - case Type::PointerTyID: - return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); - default: - return RuntimeMD::KernelArg::Struct; - } -} - -static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: - return RuntimeMD::KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: - return RuntimeMD::KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: - return RuntimeMD::KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: - return RuntimeMD::KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: - return RuntimeMD::KernelArg::Region; - default: - return RuntimeMD::KernelArg::Private; - } -} - -static void emitRuntimeMetadataForKernelArg(const DataLayout &DL, - MCStreamer &OutStreamer, Type *T, - RuntimeMD::KernelArg::Kind Kind, - StringRef BaseTypeName = "", StringRef TypeName = "", - StringRef ArgName = "", StringRef TypeQual = "", StringRef AccQual = "") { - // Emit KeyArgBegin. - OutStreamer.EmitIntValue(RuntimeMD::KeyArgBegin, 1); - - // Emit KeyArgSize and KeyArgAlign. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize, - DL.getTypeAllocSize(T), 4); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign, - DL.getABITypeAlignment(T), 4); - if (auto PT = dyn_cast(T)) { - auto ET = PT->getElementType(); - if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgPointeeAlign, - DL.getABITypeAlignment(ET), 4); - } - - // Emit KeyArgTypeName. - if (!TypeName.empty()) - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); - - // Emit KeyArgName. - if (!ArgName.empty()) - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName); - - // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. - SmallVector SplitQ; - TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); - - for (StringRef KeyName : SplitQ) { - auto Key = StringSwitch(KeyName) - .Case("volatile", RuntimeMD::KeyArgIsVolatile) - .Case("restrict", RuntimeMD::KeyArgIsRestrict) - .Case("const", RuntimeMD::KeyArgIsConst) - .Case("pipe", RuntimeMD::KeyArgIsPipe) - .Default(RuntimeMD::KeyNull); - OutStreamer.EmitIntValue(Key, 1); - } - - // Emit KeyArgKind. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgKind, Kind, 1); - - // Emit KeyArgValueType. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType, - getRuntimeMDValueType(T, BaseTypeName), 2); - - // Emit KeyArgAccQual. - if (!AccQual.empty()) { - auto AQ = StringSwitch(AccQual) - .Case("read_only", RuntimeMD::KernelArg::ReadOnly) - .Case("write_only", RuntimeMD::KernelArg::WriteOnly) - .Case("read_write", RuntimeMD::KernelArg::ReadWrite) - .Default(RuntimeMD::KernelArg::None); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1); - } - - // Emit KeyArgAddrQual. - if (auto *PT = dyn_cast(T)) - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual, - getRuntimeAddrSpace(static_cast( - PT->getAddressSpace())), 1); - - // Emit KeyArgEnd - OutStreamer.EmitIntValue(RuntimeMD::KeyArgEnd, 1); -} - -void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { - if (!F.getMetadata("kernel_arg_type")) - return; - - MCContext &Context = getObjFileLowering().getContext(); - OutStreamer->SwitchSection( - Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); - OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1); - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyKernelName, F.getName()); - - const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &Arg : F.args()) { - unsigned I = Arg.getArgNo(); - Type *T = Arg.getType(); - auto TypeName = dyn_cast(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - auto BaseTypeName = cast(F.getMetadata( - "kernel_arg_base_type")->getOperand(I))->getString(); - StringRef ArgName; - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) - ArgName = cast(ArgNameMD->getOperand(I))->getString(); - auto TypeQual = cast(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - auto AccQual = cast(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - RuntimeMD::KernelArg::Kind Kind; - if (TypeQual.find("pipe") != StringRef::npos) - Kind = RuntimeMD::KernelArg::Pipe; - else Kind = StringSwitch(BaseTypeName) - .Case("sampler_t", RuntimeMD::KernelArg::Sampler) - .Case("queue_t", RuntimeMD::KernelArg::Queue) - .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", - "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image) - .Cases("image2d_depth_t", "image2d_array_depth_t", - "image2d_msaa_t", "image2d_array_msaa_t", - "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) - .Cases("image2d_array_msaa_depth_t", "image3d_t", - RuntimeMD::KernelArg::Image) - .Default(isa(T) ? - (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? - RuntimeMD::KernelArg::DynamicSharedPointer : - RuntimeMD::KernelArg::GlobalBuffer) : - RuntimeMD::KernelArg::ByValue); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, T, - Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual); - } - - // Emit hidden kernel arguments for OpenCL kernels. - if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { - auto Int64T = Type::getInt64Ty(F.getContext()); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, - RuntimeMD::KernelArg::HiddenGlobalOffsetX); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, - RuntimeMD::KernelArg::HiddenGlobalOffsetY); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, - RuntimeMD::KernelArg::HiddenGlobalOffsetZ); - if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { - auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), - RuntimeMD::KernelArg::Global); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int8PtrT, - RuntimeMD::KernelArg::HiddenPrintfBuffer); - } - } - - // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. - if (auto RWGS = F.getMetadata("reqd_work_group_size")) { - emitRuntimeMDThreeIntValues(*OutStreamer, RuntimeMD::KeyReqdWorkGroupSize, - RWGS, 4); - } - - if (auto WGSH = F.getMetadata("work_group_size_hint")) { - emitRuntimeMDThreeIntValues(*OutStreamer, RuntimeMD::KeyWorkGroupSizeHint, - WGSH, 4); - } - - if (auto VTH = F.getMetadata("vec_type_hint")) { - auto TypeName = getOCLTypeName(cast( - VTH->getOperand(0))->getType(), mdconst::extract( - VTH->getOperand(1))->getZExtValue()); - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyVecTypeHint, TypeName); - } - - // Emit KeyKernelEnd - OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1); -} Index: lib/Target/AMDGPU/AMDGPUPTNote.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUPTNote.h @@ -0,0 +1,42 @@ +//===-- AMDGPUNoteType.h - AMDGPU ELF PT_NOTE section info-------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Enums and constants for AMDGPU PT_NOTE sections. +/// +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H + +namespace AMDGPU { + +namespace PT_NOTE { + +const char SectionName[] = ".note"; + +const char NoteName[] = "AMD"; + +enum NoteType{ + NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, + NT_AMDGPU_HSA_HSAIL = 2, + NT_AMDGPU_HSA_ISA = 3, + NT_AMDGPU_HSA_PRODUCER = 4, + NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, + NT_AMDGPU_HSA_EXTENSION = 6, + NT_AMDGPU_HSA_RUNTIME_METADATA = 7, + NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, + NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 + }; +} +} + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNOTETYPE_H Index: lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -13,11 +13,11 @@ /// /// Runtime requests certain information (metadata) about kernels to be able /// to execute the kernels and answer the queries about the kernels. -/// The metadata is represented as a byte stream in an ELF section of a -/// binary (code object). The byte stream consists of key-value pairs. -/// Each key is an 8 bit unsigned integer. Each value can be an integer, -/// a string, or a stream of key-value pairs. There are 3 levels of key-value -/// pair streams. At the beginning of the ELF section is the top level +/// The metadata is represented as a note element in the .note ELF section of a +/// binary (code object). The desc field of the note element consists of +/// key-value pairs. Each key is an 8 bit unsigned integer. Each value can be +/// an integer, a string, or a stream of key-value pairs. There are 3 levels of +/// key-value pair streams. At the beginning of the ELF section is the top level /// key-value pair stream. A kernel-level key-value pair stream starts after /// encountering KeyKernelBegin and ends immediately before encountering /// KeyKernelEnd. A kernel-argument-level key-value pair stream starts @@ -47,9 +47,6 @@ const unsigned char MDVersion = 1; const unsigned char MDRevision = 0; - // ELF section name containing runtime metadata - const char SectionName[] = ".AMDGPU.runtime_metadata"; - // Enumeration values of keys in runtime metadata. enum Key { KeyNull = 0, // Place holder. Ignored when encountered Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -14,9 +14,15 @@ #include "llvm/MC/MCStreamer.h" namespace llvm { +#include "AMDGPURuntimeMetadata.h" +class DataLayout; +class Function; class MCELFStreamer; class MCSymbol; +class MDNode; +class Module; +class Type; class AMDGPUTargetStreamer : public MCTargetStreamer { public: @@ -36,6 +42,36 @@ virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; + + /// Emit runtime metadata as a note element. + void emitRuntimeMetadataAsNoteElement(Module &M); + +private: + void emitRuntimeMetadata(Module &M); + void emitStartOfRuntimeMetadata(const Module &M); + + /// Emit runtime metadata for a kernel function. + void emitRuntimeMetadata(const Function &F); + + // Emit runtime metadata for a kernel argument. + void emitRuntimeMetadataForKernelArg(const DataLayout &DL, + Type *T, AMDGPU::RuntimeMD::KernelArg::Kind Kind, + StringRef BaseTypeName = "", StringRef TypeName = "", + StringRef ArgName = "", StringRef TypeQual = "", + StringRef AccQual = ""); + + /// Emit a key and an integer value for runtime metadata. + void emitRuntimeMDIntValue(AMDGPU::RuntimeMD::Key K, + uint64_t V, unsigned Size); + + /// Emit a key and a string value for runtime metadata. + void emitRuntimeMDStringValue(AMDGPU::RuntimeMD::Key K, + StringRef S); + + /// Emit a key and three integer values for runtime metadata. + /// The three integer values are obtained from MDNode \p Node; + void emitRuntimeMDThreeIntValues(AMDGPU::RuntimeMD::Key K, MDNode *Node, + unsigned Size); }; class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { @@ -59,19 +95,6 @@ }; class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { - - enum NoteType { - NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, - NT_AMDGPU_HSA_HSAIL = 2, - NT_AMDGPU_HSA_ISA = 3, - NT_AMDGPU_HSA_PRODUCER = 4, - NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, - NT_AMDGPU_HSA_EXTENSION = 6, - NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, - NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 - }; - - static const char *NoteName; MCStreamer &Streamer; public: Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,11 +11,16 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPU.h" #include "AMDGPUTargetStreamer.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -23,10 +28,15 @@ #include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" +namespace llvm { +#include "AMDGPUPTNote.h" +} + using namespace llvm; +using namespace llvm::AMDGPU; AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) - : MCTargetStreamer(S) { } + : MCTargetStreamer(S) {} //===----------------------------------------------------------------------===// // AMDGPUTargetAsmStreamer @@ -86,10 +96,8 @@ // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// -const char *AMDGPUTargetELFStreamer::NoteName = "AMD"; - AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S) - : AMDGPUTargetStreamer(S), Streamer(S) { } + : AMDGPUTargetStreamer(S), Streamer(S) {} MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { return static_cast(Streamer); @@ -100,16 +108,18 @@ uint32_t Minor) { MCStreamer &OS = getStreamer(); MCSectionELF *Note = - OS.getContext().getELFSection(".note", ELF::SHT_NOTE, ELF::SHF_ALLOC); + OS.getContext().getELFSection(PT_NOTE::NoteName, ELF::SHT_NOTE, + ELF::SHF_ALLOC); + auto NameSZ = sizeof(PT_NOTE::NoteName); OS.PushSection(); OS.SwitchSection(Note); - OS.EmitIntValue(strlen(NoteName) + 1, 4); // namesz - OS.EmitIntValue(8, 4); // descz - OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type - OS.EmitBytes(StringRef(NoteName, strlen(NoteName) + 1)); // name + OS.EmitIntValue(NameSZ, 4); // namesz + OS.EmitIntValue(8, 4); // descz + OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type + OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name OS.EmitValueToAlignment(4); - OS.EmitIntValue(Major, 4); // desc + OS.EmitIntValue(Major, 4); // desc OS.EmitIntValue(Minor, 4); OS.EmitValueToAlignment(4); OS.PopSection(); @@ -123,7 +133,8 @@ StringRef ArchName) { MCStreamer &OS = getStreamer(); MCSectionELF *Note = - OS.getContext().getELFSection(".note", ELF::SHT_NOTE, ELF::SHF_ALLOC); + OS.getContext().getELFSection(PT_NOTE::NoteName, ELF::SHT_NOTE, + ELF::SHF_ALLOC); uint16_t VendorNameSize = VendorName.size() + 1; uint16_t ArchNameSize = ArchName.size() + 1; @@ -133,10 +144,11 @@ OS.PushSection(); OS.SwitchSection(Note); - OS.EmitIntValue(strlen(NoteName) + 1, 4); // namesz + auto NameSZ = sizeof(PT_NOTE::NoteName); + OS.EmitIntValue(NameSZ, 4); // namesz OS.EmitIntValue(DescSZ, 4); // descsz - OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type - OS.EmitBytes(StringRef(NoteName, strlen(NoteName) + 1)); // name + OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_ISA, 4); // type + OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name OS.EmitValueToAlignment(4); OS.EmitIntValue(VendorNameSize, 2); // desc OS.EmitIntValue(ArchNameSize, 2); @@ -184,3 +196,341 @@ Symbol->setType(ELF::STT_OBJECT); Symbol->setBinding(ELF::STB_GLOBAL); } + +void AMDGPUTargetStreamer::emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V, + unsigned Size) { + auto &S = getStreamer(); + S.EmitIntValue(K, 1); + S.EmitIntValue(V, Size); +} + +void AMDGPUTargetStreamer::emitRuntimeMDStringValue(RuntimeMD::Key K, + StringRef R) { + auto &S = getStreamer(); + S.EmitIntValue(K, 1); + S.EmitIntValue(R.size(), 4); + S.EmitBytes(R); +} + +void AMDGPUTargetStreamer::emitRuntimeMDThreeIntValues(RuntimeMD::Key K, + MDNode *Node, + unsigned Size) { + assert(Node->getNumOperands() == 3); + + auto &S = getStreamer(); + S.EmitIntValue(K, 1); + for (const MDOperand &Op : Node->operands()) { + const ConstantInt *CI = mdconst::extract(Op); + S.EmitIntValue(CI->getZExtValue(), Size); + } +} + +void AMDGPUTargetStreamer::emitStartOfRuntimeMetadata(const Module &M) { + emitRuntimeMDIntValue(RuntimeMD::KeyMDVersion, + RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); + if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { + if (MD->getNumOperands() != 0) { + auto Node = MD->getOperand(0); + if (Node->getNumOperands() > 1) { + emitRuntimeMDIntValue(RuntimeMD::KeyLanguage, + RuntimeMD::OpenCL_C, 1); + uint16_t Major = mdconst::extract(Node->getOperand(0)) + ->getZExtValue(); + uint16_t Minor = mdconst::extract(Node->getOperand(1)) + ->getZExtValue(); + emitRuntimeMDIntValue(RuntimeMD::KeyLanguageVersion, + Major * 100 + Minor * 10, 2); + } + } + } + + if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { + for (unsigned I = 0; I < MD->getNumOperands(); ++I) { + auto Node = MD->getOperand(I); + if (Node->getNumOperands() > 0) + emitRuntimeMDStringValue(RuntimeMD::KeyPrintfInfo, + cast(Node->getOperand(0))->getString()); + } + } +} + +static std::string getOCLTypeName(Type *Ty, bool Signed) { + switch (Ty->getTypeID()) { + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getOCLTypeName(Ty, true)).str(); + unsigned BW = Ty->getIntegerBitWidth(); + switch (BW) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BW)).str(); + } + } + case Type::VectorTyID: { + VectorType *VecTy = cast(Ty); + Type *EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getVectorNumElements(); + return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); + } + default: + return "unknown"; + } +} + +static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( + Type *Ty, StringRef TypeName) { + switch (Ty->getTypeID()) { + case Type::HalfTyID: + return RuntimeMD::KernelArg::F16; + case Type::FloatTyID: + return RuntimeMD::KernelArg::F32; + case Type::DoubleTyID: + return RuntimeMD::KernelArg::F64; + case Type::IntegerTyID: { + bool Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8; + case 16: + return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16; + case 32: + return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32; + case 64: + return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64; + default: + // Runtime does not recognize other integer types. Report as struct type. + return RuntimeMD::KernelArg::Struct; + } + } + case Type::VectorTyID: + return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); + case Type::PointerTyID: + return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); + default: + return RuntimeMD::KernelArg::Struct; + } +} + +static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( + AMDGPUAS::AddressSpaces A) { + switch (A) { + case AMDGPUAS::GLOBAL_ADDRESS: + return RuntimeMD::KernelArg::Global; + case AMDGPUAS::CONSTANT_ADDRESS: + return RuntimeMD::KernelArg::Constant; + case AMDGPUAS::LOCAL_ADDRESS: + return RuntimeMD::KernelArg::Local; + case AMDGPUAS::FLAT_ADDRESS: + return RuntimeMD::KernelArg::Generic; + case AMDGPUAS::REGION_ADDRESS: + return RuntimeMD::KernelArg::Region; + default: + return RuntimeMD::KernelArg::Private; + } +} + +void AMDGPUTargetStreamer::emitRuntimeMetadataForKernelArg(const DataLayout &DL, + Type *T, RuntimeMD::KernelArg::Kind Kind, + StringRef BaseTypeName, StringRef TypeName, + StringRef ArgName, StringRef TypeQual, StringRef AccQual) { + auto &S = getStreamer(); + + // Emit KeyArgBegin. + S.EmitIntValue(RuntimeMD::KeyArgBegin, 1); + + // Emit KeyArgSize and KeyArgAlign. + emitRuntimeMDIntValue(RuntimeMD::KeyArgSize, + DL.getTypeAllocSize(T), 4); + emitRuntimeMDIntValue(RuntimeMD::KeyArgAlign, + DL.getABITypeAlignment(T), 4); + if (auto PT = dyn_cast(T)) { + auto ET = PT->getElementType(); + if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) + emitRuntimeMDIntValue(RuntimeMD::KeyArgPointeeAlign, + DL.getABITypeAlignment(ET), 4); + } + + // Emit KeyArgTypeName. + if (!TypeName.empty()) + emitRuntimeMDStringValue(RuntimeMD::KeyArgTypeName, TypeName); + + // Emit KeyArgName. + if (!ArgName.empty()) + emitRuntimeMDStringValue(RuntimeMD::KeyArgName, ArgName); + + // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. + SmallVector SplitQ; + TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); + + for (StringRef KeyName : SplitQ) { + auto Key = StringSwitch(KeyName) + .Case("volatile", RuntimeMD::KeyArgIsVolatile) + .Case("restrict", RuntimeMD::KeyArgIsRestrict) + .Case("const", RuntimeMD::KeyArgIsConst) + .Case("pipe", RuntimeMD::KeyArgIsPipe) + .Default(RuntimeMD::KeyNull); + S.EmitIntValue(Key, 1); + } + + // Emit KeyArgKind. + emitRuntimeMDIntValue(RuntimeMD::KeyArgKind, Kind, 1); + + // Emit KeyArgValueType. + emitRuntimeMDIntValue(RuntimeMD::KeyArgValueType, + getRuntimeMDValueType(T, BaseTypeName), 2); + + // Emit KeyArgAccQual. + if (!AccQual.empty()) { + auto AQ = StringSwitch(AccQual) + .Case("read_only", RuntimeMD::KernelArg::ReadOnly) + .Case("write_only", RuntimeMD::KernelArg::WriteOnly) + .Case("read_write", RuntimeMD::KernelArg::ReadWrite) + .Default(RuntimeMD::KernelArg::None); + emitRuntimeMDIntValue(RuntimeMD::KeyArgAccQual, AQ, 1); + } + + // Emit KeyArgAddrQual. + if (auto *PT = dyn_cast(T)) + emitRuntimeMDIntValue(RuntimeMD::KeyArgAddrQual, + getRuntimeAddrSpace(static_cast( + PT->getAddressSpace())), 1); + + // Emit KeyArgEnd + S.EmitIntValue(RuntimeMD::KeyArgEnd, 1); +} + +void AMDGPUTargetStreamer::emitRuntimeMetadata(const Function &F) { + if (!F.getMetadata("kernel_arg_type")) + return; + auto &S = getStreamer(); + S.EmitIntValue(RuntimeMD::KeyKernelBegin, 1); + emitRuntimeMDStringValue(RuntimeMD::KeyKernelName, F.getName()); + + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto &Arg : F.args()) { + unsigned I = Arg.getArgNo(); + Type *T = Arg.getType(); + auto TypeName = dyn_cast(F.getMetadata( + "kernel_arg_type")->getOperand(I))->getString(); + auto BaseTypeName = cast(F.getMetadata( + "kernel_arg_base_type")->getOperand(I))->getString(); + StringRef ArgName; + if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) + ArgName = cast(ArgNameMD->getOperand(I))->getString(); + auto TypeQual = cast(F.getMetadata( + "kernel_arg_type_qual")->getOperand(I))->getString(); + auto AccQual = cast(F.getMetadata( + "kernel_arg_access_qual")->getOperand(I))->getString(); + RuntimeMD::KernelArg::Kind Kind; + if (TypeQual.find("pipe") != StringRef::npos) + Kind = RuntimeMD::KernelArg::Pipe; + else Kind = StringSwitch(BaseTypeName) + .Case("sampler_t", RuntimeMD::KernelArg::Sampler) + .Case("queue_t", RuntimeMD::KernelArg::Queue) + .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", + "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image) + .Cases("image2d_depth_t", "image2d_array_depth_t", + "image2d_msaa_t", "image2d_array_msaa_t", + "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) + .Cases("image2d_array_msaa_depth_t", "image3d_t", + RuntimeMD::KernelArg::Image) + .Default(isa(T) ? + (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? + RuntimeMD::KernelArg::DynamicSharedPointer : + RuntimeMD::KernelArg::GlobalBuffer) : + RuntimeMD::KernelArg::ByValue); + emitRuntimeMetadataForKernelArg(DL, T, + Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual); + } + + // Emit hidden kernel arguments for OpenCL kernels. + if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto Int64T = Type::getInt64Ty(F.getContext()); + emitRuntimeMetadataForKernelArg(DL, Int64T, + RuntimeMD::KernelArg::HiddenGlobalOffsetX); + emitRuntimeMetadataForKernelArg(DL, Int64T, + RuntimeMD::KernelArg::HiddenGlobalOffsetY); + emitRuntimeMetadataForKernelArg(DL, Int64T, + RuntimeMD::KernelArg::HiddenGlobalOffsetZ); + if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { + auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), + RuntimeMD::KernelArg::Global); + emitRuntimeMetadataForKernelArg(DL, Int8PtrT, + RuntimeMD::KernelArg::HiddenPrintfBuffer); + } + } + + // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. + if (auto RWGS = F.getMetadata("reqd_work_group_size")) { + emitRuntimeMDThreeIntValues(RuntimeMD::KeyReqdWorkGroupSize, + RWGS, 4); + } + + if (auto WGSH = F.getMetadata("work_group_size_hint")) { + emitRuntimeMDThreeIntValues(RuntimeMD::KeyWorkGroupSizeHint, + WGSH, 4); + } + + if (auto VTH = F.getMetadata("vec_type_hint")) { + auto TypeName = getOCLTypeName(cast( + VTH->getOperand(0))->getType(), mdconst::extract( + VTH->getOperand(1))->getZExtValue()); + emitRuntimeMDStringValue(RuntimeMD::KeyVecTypeHint, TypeName); + } + + // Emit KeyKernelEnd + S.EmitIntValue(RuntimeMD::KeyKernelEnd, 1); +} + +void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) { + auto &S = getStreamer(); + auto &Context = S.getContext(); + + auto NameSZ = sizeof(PT_NOTE::NoteName); // Size of note name including trailing null. + + S.PushSection(); + S.SwitchSection(Context.getELFSection( + PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + + // Create two labels to mark the beginning and end of the desc field + // and a MCExpr to calculate the size of the desc field. + auto *DescBegin = Context.createTempSymbol(); + auto *DescEnd = Context.createTempSymbol(); + auto *DescSZ = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + // Emit the note element for runtime metadata. + // Name and desc should be padded to 4 byte boundary but size of name and + // desc should not include padding 0's. + S.EmitIntValue(NameSZ, 4); // namesz + S.EmitValue(DescSZ, 4); // descz + S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type + S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name + S.EmitValueToAlignment(4); // padding 0 + S.EmitLabel(DescBegin); + emitRuntimeMetadata(M); // desc + S.EmitLabel(DescEnd); + S.EmitValueToAlignment(4); // padding 0 + S.PopSection(); +} + +void AMDGPUTargetStreamer::emitRuntimeMetadata(Module &M) { + emitStartOfRuntimeMetadata(M); + for (auto &F : M.functions()) + emitRuntimeMetadata(F); +} + Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll @@ -1,8 +1,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: .section .AMDGPU.runtime_metadata -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 256 +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long {{.+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .asciz "AMD" !opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll @@ -1,9 +1,11 @@ ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: .section .AMDGPU.runtime_metadata -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 256 +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long {{.+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .asciz "AMD" !opencl.ocl.version = !{!0} !0 = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll @@ -1,9 +1,11 @@ ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: .section .AMDGPU.runtime_metadata -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 256 +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long {{.+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .asciz "AMD" !opencl.ocl.version = !{!0} !0 = !{i32 1} Index: test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata.ll +++ test/CodeGen/AMDGPU/runtime-metadata.ll @@ -9,7 +9,13 @@ %struct.B = type { i32 addrspace(1)*} %opencl.clk_event_t = type opaque -; CHECK: .section .AMDGPU.runtime_metadata +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long [[Ltmp1:.+]]-[[Ltmp0:.+]] +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .asciz "AMD" +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: [[Ltmp0]]: ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .short 256 ; CHECK-NEXT: .byte 2 @@ -23,8 +29,6 @@ ; CHECK-NEXT: .long 10 ; CHECK-NEXT: .ascii "2:1:8:%g\\n" -; CHECK-LABEL:{{^}}test_char: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 9 @@ -92,8 +96,6 @@ ret void } -; CHECK-LABEL:{{^}}test_ushort2: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -161,8 +163,6 @@ ret void } -; CHECK-LABEL:{{^}}test_int3: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 9 @@ -230,8 +230,6 @@ ret void } -; CHECK-LABEL:{{^}}test_ulong4: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 11 @@ -299,8 +297,6 @@ ret void } -; CHECK-LABEL:{{^}}test_half8: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 10 @@ -368,8 +364,6 @@ ret void } -; CHECK-LABEL:{{^}}test_float16: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -437,8 +431,6 @@ ret void } -; CHECK-LABEL:{{^}}test_double16: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 13 @@ -506,8 +498,6 @@ ret void } -; CHECK-LABEL:{{^}}test_pointer: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -577,8 +567,6 @@ ret void } -; CHECK-LABEL:{{^}}test_image: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 10 @@ -648,8 +636,6 @@ ret void } -; CHECK-LABEL:{{^}}test_sampler: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -717,8 +703,6 @@ ret void } -; CHECK-LABEL:{{^}}test_queue: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 10 @@ -788,8 +772,6 @@ ret void } -; CHECK-LABEL:{{^}}test_struct: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 11 @@ -859,8 +841,6 @@ ret void } -; CHECK-LABEL:{{^}}test_i128: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 9 @@ -928,8 +908,6 @@ ret void } -; CHECK-LABEL:{{^}}test_multi_arg: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 14 @@ -1027,8 +1005,6 @@ ret void } -; CHECK-LABEL:{{^}}test_addr_space: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 15 @@ -1134,8 +1110,6 @@ ret void } -; CHECK-LABEL:{{^}}test_type_qual: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 14 @@ -1243,8 +1217,6 @@ ret void } -; CHECK-LABEL:{{^}}test_access_qual: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 16 @@ -1348,8 +1320,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_half: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 23 @@ -1420,8 +1390,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_float: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 24 @@ -1492,8 +1460,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_double: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 25 @@ -1564,8 +1530,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_char: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 23 @@ -1636,8 +1600,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_short: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 24 @@ -1708,8 +1670,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_long: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 23 @@ -1780,8 +1740,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_unknown: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 26 @@ -1852,8 +1810,6 @@ ret void } -; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 27 @@ -1928,8 +1884,6 @@ ret void } -; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 27 @@ -2004,8 +1958,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 19 @@ -2075,8 +2027,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 28 @@ -2146,8 +2096,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_vector_of_ptr: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 22 @@ -2215,8 +2163,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 29 @@ -2287,8 +2233,6 @@ ret void } -; CHECK-LABEL:{{^}}test_pointee_align: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 18 @@ -2471,6 +2415,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { ret void } +; CHECK-NEXT: [[Ltmp1]]: +; CHECK-NEXT: .p2align 2 !llvm.printf.fmts = !{!100, !101}