Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -150,10 +150,6 @@ unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; - void emitStartOfRuntimeMetadata(const Module &M); - - void emitRuntimeMetadata(const Function &F); - protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -44,6 +44,47 @@ using namespace ::AMDGPU; using namespace llvm; +namespace AMDGPU { + + // Class for emitting runtime metadata as a note element in .note section. + // To emit runtime metadata, define an instance of this class. + class EmitRuntimeMD{ + public: + EmitRuntimeMD(MCStreamer &S, Module &M) : S(S) { + emitAsNoteElement(M); + } + private: + // Emit runtime metadata as a note element. + void emitAsNoteElement(Module &M); + + void emitRuntimeMetadata(Module &M); + void emitStartOfRuntimeMetadata(const Module &M); + + // Emit runtime metadata for a kernel function. + void emitRuntimeMetadata(const Function &F); + + // Emit runtime metadata for a kernel argument. + void emitRuntimeMetadataForKernelArg(const DataLayout &DL, + Type *T, RuntimeMD::KernelArg::Kind Kind, + StringRef BaseTypeName = "", StringRef TypeName = "", + StringRef ArgName = "", StringRef TypeQual = "", + StringRef AccQual = ""); + + // Emit a key and an integer value for runtime metadata. + void emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V, unsigned Size); + + // Emit a key and a string value for runtime metadata. + void emitRuntimeMDStringValue(RuntimeMD::Key K, StringRef S); + + // Emit a key and three integer values for runtime metadata. + // The three integer values are obtained from MDNode \p Node; + void emitRuntimeMDThreeIntValues(RuntimeMD::Key K, MDNode *Node, + unsigned Size); + + MCStreamer &S; + }; +} + // TODO: This should get the default rounding mode from the kernel. We just set // the default here, but this could change if the OpenCL rounding mode pragmas // are used. @@ -117,7 +158,9 @@ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); - emitStartOfRuntimeMetadata(M); + + // Emit runtime metadata. + EmitRuntimeMD EmitMD(*OutStreamer, M); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -277,8 +320,6 @@ } } - emitRuntimeMetadata(*MF.getFunction()); - return false; } @@ -791,53 +832,45 @@ return false; } -// Emit a key and an integer value for runtime metadata. -static void emitRuntimeMDIntValue(MCStreamer &Streamer, - RuntimeMD::Key K, uint64_t V, - unsigned Size) { - Streamer.EmitIntValue(K, 1); - Streamer.EmitIntValue(V, Size); +void EmitRuntimeMD::emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V, + unsigned Size) { + S.EmitIntValue(K, 1); + S.EmitIntValue(V, Size); } -// Emit a key and a string value for runtime metadata. -static void emitRuntimeMDStringValue(MCStreamer &Streamer, - RuntimeMD::Key K, StringRef S) { - Streamer.EmitIntValue(K, 1); - Streamer.EmitIntValue(S.size(), 4); - Streamer.EmitBytes(S); +void EmitRuntimeMD::emitRuntimeMDStringValue(RuntimeMD::Key K, + StringRef R) { + S.EmitIntValue(K, 1); + S.EmitIntValue(R.size(), 4); + S.EmitBytes(R); } -// Emit a key and three integer values for runtime metadata. -// The three integer values are obtained from MDNode \p Node; -static void emitRuntimeMDThreeIntValues(MCStreamer &Streamer, - RuntimeMD::Key K, MDNode *Node, - unsigned Size) { +void EmitRuntimeMD::emitRuntimeMDThreeIntValues(RuntimeMD::Key K, + MDNode *Node, + unsigned Size) { assert(Node->getNumOperands() == 3); - Streamer.EmitIntValue(K, 1); + S.EmitIntValue(K, 1); for (const MDOperand &Op : Node->operands()) { const ConstantInt *CI = mdconst::extract(Op); - Streamer.EmitIntValue(CI->getZExtValue(), Size); + S.EmitIntValue(CI->getZExtValue(), Size); } } -void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) { - OutStreamer->SwitchSection(getObjFileLowering().getContext() - .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); - - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyMDVersion, +void EmitRuntimeMD::emitStartOfRuntimeMetadata(const Module &M) { + emitRuntimeMDIntValue(RuntimeMD::KeyMDVersion, RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { if (MD->getNumOperands() != 0) { auto Node = MD->getOperand(0); if (Node->getNumOperands() > 1) { - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyLanguage, + emitRuntimeMDIntValue(RuntimeMD::KeyLanguage, RuntimeMD::OpenCL_C, 1); uint16_t Major = mdconst::extract(Node->getOperand(0)) ->getZExtValue(); uint16_t Minor = mdconst::extract(Node->getOperand(1)) ->getZExtValue(); - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyLanguageVersion, + emitRuntimeMDIntValue(RuntimeMD::KeyLanguageVersion, Major * 100 + Minor * 10, 2); } } @@ -847,7 +880,7 @@ for (unsigned I = 0; I < MD->getNumOperands(); ++I) { auto Node = MD->getOperand(I); if (Node->getNumOperands() > 0) - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyPrintfInfo, + emitRuntimeMDStringValue(RuntimeMD::KeyPrintfInfo, cast(Node->getOperand(0))->getString()); } } @@ -941,33 +974,32 @@ } } -static void emitRuntimeMetadataForKernelArg(const DataLayout &DL, - MCStreamer &OutStreamer, Type *T, - RuntimeMD::KernelArg::Kind Kind, - StringRef BaseTypeName = "", StringRef TypeName = "", - StringRef ArgName = "", StringRef TypeQual = "", StringRef AccQual = "") { +void EmitRuntimeMD::emitRuntimeMetadataForKernelArg(const DataLayout &DL, + Type *T, RuntimeMD::KernelArg::Kind Kind, + StringRef BaseTypeName, StringRef TypeName, + StringRef ArgName, StringRef TypeQual, StringRef AccQual) { // Emit KeyArgBegin. - OutStreamer.EmitIntValue(RuntimeMD::KeyArgBegin, 1); + S.EmitIntValue(RuntimeMD::KeyArgBegin, 1); // Emit KeyArgSize and KeyArgAlign. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize, + emitRuntimeMDIntValue(RuntimeMD::KeyArgSize, DL.getTypeAllocSize(T), 4); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign, + emitRuntimeMDIntValue(RuntimeMD::KeyArgAlign, DL.getABITypeAlignment(T), 4); if (auto PT = dyn_cast(T)) { auto ET = PT->getElementType(); if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgPointeeAlign, + emitRuntimeMDIntValue(RuntimeMD::KeyArgPointeeAlign, DL.getABITypeAlignment(ET), 4); } // Emit KeyArgTypeName. if (!TypeName.empty()) - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); + emitRuntimeMDStringValue(RuntimeMD::KeyArgTypeName, TypeName); // Emit KeyArgName. if (!ArgName.empty()) - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName); + emitRuntimeMDStringValue(RuntimeMD::KeyArgName, ArgName); // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. SmallVector SplitQ; @@ -980,14 +1012,14 @@ .Case("const", RuntimeMD::KeyArgIsConst) .Case("pipe", RuntimeMD::KeyArgIsPipe) .Default(RuntimeMD::KeyNull); - OutStreamer.EmitIntValue(Key, 1); + S.EmitIntValue(Key, 1); } // Emit KeyArgKind. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgKind, Kind, 1); + emitRuntimeMDIntValue(RuntimeMD::KeyArgKind, Kind, 1); // Emit KeyArgValueType. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType, + emitRuntimeMDIntValue(RuntimeMD::KeyArgValueType, getRuntimeMDValueType(T, BaseTypeName), 2); // Emit KeyArgAccQual. @@ -997,28 +1029,24 @@ .Case("write_only", RuntimeMD::KernelArg::WriteOnly) .Case("read_write", RuntimeMD::KernelArg::ReadWrite) .Default(RuntimeMD::KernelArg::None); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1); + emitRuntimeMDIntValue(RuntimeMD::KeyArgAccQual, AQ, 1); } // Emit KeyArgAddrQual. if (auto *PT = dyn_cast(T)) - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual, + emitRuntimeMDIntValue(RuntimeMD::KeyArgAddrQual, getRuntimeAddrSpace(static_cast( PT->getAddressSpace())), 1); // Emit KeyArgEnd - OutStreamer.EmitIntValue(RuntimeMD::KeyArgEnd, 1); + S.EmitIntValue(RuntimeMD::KeyArgEnd, 1); } -void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { +void EmitRuntimeMD::emitRuntimeMetadata(const Function &F) { if (!F.getMetadata("kernel_arg_type")) return; - - MCContext &Context = getObjFileLowering().getContext(); - OutStreamer->SwitchSection( - Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); - OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1); - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyKernelName, F.getName()); + S.EmitIntValue(RuntimeMD::KeyKernelBegin, 1); + emitRuntimeMDStringValue(RuntimeMD::KeyKernelName, F.getName()); const DataLayout &DL = F.getParent()->getDataLayout(); for (auto &Arg : F.args()) { @@ -1053,35 +1081,35 @@ RuntimeMD::KernelArg::DynamicSharedPointer : RuntimeMD::KernelArg::GlobalBuffer) : RuntimeMD::KernelArg::ByValue); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, T, + emitRuntimeMetadataForKernelArg(DL, T, Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual); } // Emit hidden kernel arguments for OpenCL kernels. if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { auto Int64T = Type::getInt64Ty(F.getContext()); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, + emitRuntimeMetadataForKernelArg(DL, Int64T, RuntimeMD::KernelArg::HiddenGlobalOffsetX); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, + emitRuntimeMetadataForKernelArg(DL, Int64T, RuntimeMD::KernelArg::HiddenGlobalOffsetY); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, + emitRuntimeMetadataForKernelArg(DL, Int64T, RuntimeMD::KernelArg::HiddenGlobalOffsetZ); if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), RuntimeMD::KernelArg::Global); - emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int8PtrT, + emitRuntimeMetadataForKernelArg(DL, Int8PtrT, RuntimeMD::KernelArg::HiddenPrintfBuffer); } } // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. if (auto RWGS = F.getMetadata("reqd_work_group_size")) { - emitRuntimeMDThreeIntValues(*OutStreamer, RuntimeMD::KeyReqdWorkGroupSize, + emitRuntimeMDThreeIntValues(RuntimeMD::KeyReqdWorkGroupSize, RWGS, 4); } if (auto WGSH = F.getMetadata("work_group_size_hint")) { - emitRuntimeMDThreeIntValues(*OutStreamer, RuntimeMD::KeyWorkGroupSizeHint, + emitRuntimeMDThreeIntValues(RuntimeMD::KeyWorkGroupSizeHint, WGSH, 4); } @@ -1089,9 +1117,45 @@ auto TypeName = getOCLTypeName(cast( VTH->getOperand(0))->getType(), mdconst::extract( VTH->getOperand(1))->getZExtValue()); - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyVecTypeHint, TypeName); + emitRuntimeMDStringValue(RuntimeMD::KeyVecTypeHint, TypeName); } // Emit KeyKernelEnd - OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1); + S.EmitIntValue(RuntimeMD::KeyKernelEnd, 1); +} + +void EmitRuntimeMD::emitAsNoteElement(Module &M) { + auto &Context = S.getContext(); + + StringRef Name = "AMD"; // Vendor name of node element + const unsigned NameSZ = 4; // Size of Name padded to a 4 byte boundary + S.PushSection(); + S.SwitchSection(Context.getELFSection( + RuntimeMD::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + + // Create two labels to mark the beginning and end of the desc field + // and a MCExpr to calculate the size of the desc field. + auto *DescBegin = Context.createTempSymbol(); + auto *DescEnd = Context.createTempSymbol(); + auto *DescSZ = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + // Emit the note element for runtime metadata. + S.EmitIntValue(NameSZ, 4); // namesz + S.EmitValue(DescSZ, 4); // descz + S.EmitIntValue(RuntimeMD::NoteType, 4); // type + S.EmitBytes(Name); // name + S.EmitValueToAlignment(4); // padding 0 + S.EmitLabel(DescBegin); + emitRuntimeMetadata(M); // desc + S.EmitValueToAlignment(4); // padding 0 + S.EmitLabel(DescEnd); + S.PopSection(); +} + +void EmitRuntimeMD::emitRuntimeMetadata(Module &M) { + emitStartOfRuntimeMetadata(M); + for (auto &F : M.functions()) + emitRuntimeMetadata(F); } Index: lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -13,11 +13,11 @@ /// /// Runtime requests certain information (metadata) about kernels to be able /// to execute the kernels and answer the queries about the kernels. -/// The metadata is represented as a byte stream in an ELF section of a -/// binary (code object). The byte stream consists of key-value pairs. -/// Each key is an 8 bit unsigned integer. Each value can be an integer, -/// a string, or a stream of key-value pairs. There are 3 levels of key-value -/// pair streams. At the beginning of the ELF section is the top level +/// The metadata is represented as a note element in the .note ELF section of a +/// binary (code object). The desc field of the note element consists of +/// key-value pairs. Each key is an 8 bit unsigned integer. Each value can be +/// an integer, a string, or a stream of key-value pairs. There are 3 levels of +/// key-value pair streams. At the beginning of the ELF section is the top level /// key-value pair stream. A kernel-level key-value pair stream starts after /// encountering KeyKernelBegin and ends immediately before encountering /// KeyKernelEnd. A kernel-argument-level key-value pair stream starts @@ -48,7 +48,10 @@ const unsigned char MDRevision = 0; // ELF section name containing runtime metadata - const char SectionName[] = ".AMDGPU.runtime_metadata"; + const char SectionName[] = ".note"; + + // Note type + const uint32_t NoteType = 7; // Enumeration values of keys in runtime metadata. enum Key { Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -67,6 +67,7 @@ NT_AMDGPU_HSA_PRODUCER = 4, NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, NT_AMDGPU_HSA_EXTENSION = 6, + NT_AMDGPU_HSA_RUNTIME_METADATA = 7, NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 }; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetStreamer.h" +#include "AMDGPURuntimeMetadata.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" @@ -26,7 +27,7 @@ using namespace llvm; AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) - : MCTargetStreamer(S) { } + : MCTargetStreamer(S) {} //===----------------------------------------------------------------------===// // AMDGPUTargetAsmStreamer @@ -87,7 +88,16 @@ //===----------------------------------------------------------------------===// AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S) - : AMDGPUTargetStreamer(S), Streamer(S) { } + : AMDGPUTargetStreamer(S), Streamer(S) { + // The note type for runtime metadata is defined in AMDGPUTargetStreamer.h + // to make sure it is unique among other note types. It is also defined + // in AMDGPURuntimeMetadata.h for runtime to use. + // This static_assert makes sure they match. + static_assert(::AMDGPU::RuntimeMD::NoteType == + AMDGPUTargetELFStreamer::NT_AMDGPU_HSA_RUNTIME_METADATA, + "Mismatch between note type for runtime metadata defined in " + "AMDGPURuntimeMetadata.h and AMDGPUTargetStreamer.h"); +} MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { return static_cast(Streamer); Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll @@ -1,8 +1,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: .section .AMDGPU.runtime_metadata -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 256 +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long {{.+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "AMD" !opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll @@ -1,9 +1,11 @@ ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: .section .AMDGPU.runtime_metadata -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 256 +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long {{.+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "AMD" !opencl.ocl.version = !{!0} !0 = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll @@ -1,9 +1,11 @@ ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: .section .AMDGPU.runtime_metadata -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 256 +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long {{.+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "AMD" !opencl.ocl.version = !{!0} !0 = !{i32 1} Index: test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata.ll +++ test/CodeGen/AMDGPU/runtime-metadata.ll @@ -9,7 +9,13 @@ %struct.B = type { i32 addrspace(1)*} %opencl.clk_event_t = type opaque -; CHECK: .section .AMDGPU.runtime_metadata +; CHECK: .section .note,#alloc +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long [[Ltmp1:.+]]-[[Ltmp0:.+]] +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "AMD" +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: [[Ltmp0]]: ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .short 256 ; CHECK-NEXT: .byte 2 @@ -23,8 +29,6 @@ ; CHECK-NEXT: .long 10 ; CHECK-NEXT: .ascii "2:1:8:%g\\n" -; CHECK-LABEL:{{^}}test_char: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 9 @@ -92,8 +96,6 @@ ret void } -; CHECK-LABEL:{{^}}test_ushort2: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -161,8 +163,6 @@ ret void } -; CHECK-LABEL:{{^}}test_int3: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 9 @@ -230,8 +230,6 @@ ret void } -; CHECK-LABEL:{{^}}test_ulong4: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 11 @@ -299,8 +297,6 @@ ret void } -; CHECK-LABEL:{{^}}test_half8: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 10 @@ -368,8 +364,6 @@ ret void } -; CHECK-LABEL:{{^}}test_float16: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -437,8 +431,6 @@ ret void } -; CHECK-LABEL:{{^}}test_double16: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 13 @@ -506,8 +498,6 @@ ret void } -; CHECK-LABEL:{{^}}test_pointer: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -577,8 +567,6 @@ ret void } -; CHECK-LABEL:{{^}}test_image: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 10 @@ -648,8 +636,6 @@ ret void } -; CHECK-LABEL:{{^}}test_sampler: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 12 @@ -717,8 +703,6 @@ ret void } -; CHECK-LABEL:{{^}}test_queue: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 10 @@ -788,8 +772,6 @@ ret void } -; CHECK-LABEL:{{^}}test_struct: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 11 @@ -859,8 +841,6 @@ ret void } -; CHECK-LABEL:{{^}}test_i128: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 9 @@ -928,8 +908,6 @@ ret void } -; CHECK-LABEL:{{^}}test_multi_arg: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 14 @@ -1027,8 +1005,6 @@ ret void } -; CHECK-LABEL:{{^}}test_addr_space: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 15 @@ -1134,8 +1110,6 @@ ret void } -; CHECK-LABEL:{{^}}test_type_qual: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 14 @@ -1243,8 +1217,6 @@ ret void } -; CHECK-LABEL:{{^}}test_access_qual: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 16 @@ -1348,8 +1320,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_half: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 23 @@ -1420,8 +1390,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_float: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 24 @@ -1492,8 +1460,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_double: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 25 @@ -1564,8 +1530,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_char: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 23 @@ -1636,8 +1600,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_short: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 24 @@ -1708,8 +1670,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_long: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 23 @@ -1780,8 +1740,6 @@ ret void } -; CHECK-LABEL:{{^}}test_vec_type_hint_unknown: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 26 @@ -1852,8 +1810,6 @@ ret void } -; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 27 @@ -1928,8 +1884,6 @@ ret void } -; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 27 @@ -2004,8 +1958,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 19 @@ -2075,8 +2027,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 28 @@ -2146,8 +2096,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_vector_of_ptr: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 22 @@ -2215,8 +2163,6 @@ ret void } -; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 29 @@ -2287,8 +2233,6 @@ ret void } -; CHECK-LABEL:{{^}}test_pointee_align: -; CHECK: .section .AMDGPU.runtime_metadata ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 6 ; CHECK-NEXT: .long 18 @@ -2472,6 +2416,9 @@ ret void } +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: [[Ltmp1]]: + !llvm.printf.fmts = !{!100, !101} !1 = !{i32 0}