diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3571,12 +3571,14 @@ .. table:: AMDHSA Code Object V5 Kernel Metadata Map Additions :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v5 - ===================== ============= ========== ======================================= - String Key Value Type Required? Description - ===================== ============= ========== ======================================= - ".uses_dynamic_stack" boolean Indicates if the generated machine code - is using a dynamically sized stack. - ===================== ============= ========== ======================================= + ============================= ============= ========== ======================================= + String Key Value Type Required? Description + ============================= ============= ========== ======================================= + ".uses_dynamic_stack" boolean Indicates if the generated machine code + is using a dynamically sized stack. + ".workgroup_processor_mode" boolean (GFX10+) Controls ENABLE_WGP_MODE in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ============================= ============= ========== ======================================= .. diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -262,6 +262,8 @@ if (!verifyScalarEntry(KernelMap, ".uses_dynamic_stack", false, msgpack::Type::Boolean)) return false; + if (!verifyIntegerEntry(KernelMap, ".workgroup_processor_mode", false)) + return false; if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true)) return false; if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -877,6 +877,9 @@ if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); + if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP()) + Kern[".workgroup_processor_mode"] = + Kern.getDocument()->getNode(ProgramInfo.WgpMode); // FIXME: The metadata treats the minimum as 16? Kern[".kernarg_segment_align"] = diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -298,6 +298,8 @@ /// the original value. bool zeroesHigh16BitsOfDest(unsigned Opcode) const; + bool supportsWGP() const { return getGeneration() >= GFX10; } + bool hasIntClamp() const { return HasIntClamp; } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10-CU %s + +; GFX10: .amdhsa_workgroup_processor_mode 0 +; GFX10: .workgroup_processor_mode: 0 +; GFX10-CU: .amdhsa_workgroup_processor_mode 1 +; GFX10-CU: .workgroup_processor_mode: 1 + +define amdgpu_kernel void @wavefrontsize() { +entry: + ret void +}