diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3581,6 +3581,26 @@ :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. ============================= ============= ========== ======================================= +.. + + .. table:: AMDHSA Code Object V5 Kernel Attribute Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-attribute-metadata-map-v5-table + + =========================== ============== ========= ============================== + String Key Value Type Required? Description + =========================== ============== ========= ============================== + ".uniform_work_group_size" integer Indicates if the kernel + requires that each dimension + of global size is a multiple + of corresponding dimension of + work-group size. Value of 1 + implies true and value of 0 + implies false. Metadata is + only emitted when value is 1. + =========================== ============== ========= ============================== + +.. + .. .. table:: AMDHSA Code Object V5 Kernel Argument Metadata Map Additions and Changes diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -278,6 +278,9 @@ return false; if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false)) return false; + if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false)) + return false; + return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -58,6 +58,8 @@ virtual void emitVersion() = 0; virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) = 0; + virtual void emitKernelAttrs(const Function &Func, + msgpack::MapDocNode Kern) = 0; }; class MetadataStreamerMsgPackV3 : public MetadataStreamer { @@ -90,7 +92,7 @@ void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern); - void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern); + void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) override; void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern); @@ -149,6 +151,7 @@ void emitVersion() override; void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override; + void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) override; public: MetadataStreamerMsgPackV5() = default; @@ -211,6 +214,10 @@ msgpack::ArrayDocNode Args) override { llvm_unreachable("Dummy override should not be invoked!"); } + void emitKernelAttrs(const Function &Func, + msgpack::MapDocNode Kern) override { + llvm_unreachable("Dummy override should not be invoked!"); + } public: MetadataStreamerYamlV2() = default; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -1080,6 +1080,15 @@ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); } +void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, + msgpack::MapDocNode Kern) { + MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern); + + if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) + Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); +} + + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: - .args: +; CHECK-LABEL: .name: kernel_uniform_workgroup +; CHECK: .uniform_work_group_size: 1 +define amdgpu_kernel void @kernel_uniform_workgroup() #0 { +bb: + ret void +} + +; CHECK: - .args: +; CHECK-LABEL: .name: kernel_non_uniform_workgroup +; CHECK-NOT: .uniform_work_group_size: +define amdgpu_kernel void @kernel_non_uniform_workgroup() #1 { +bb: + ret void +} + +; CHECK: - .args: +; CHECK-LABEL: .name: kernel_no_attr +; CHECK-NOT: .uniform_work_group_size: +define amdgpu_kernel void @kernel_no_attr() { +bb: + ret void +} +attributes #0 = { "uniform-work-group-size"="true" } +attributes #1 = { "uniform-work-group-size"="false" }