diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3997,7 +3997,10 @@ - If 1 execute in native wavefront size 32 mode. - 463:459 1 bit Reserved, must be 0. + 459 1 bit IS_DYNAMIC_STACK Indicates if the generated + machine code is using a + dynamically sized stack. + 463:460 1 bit Reserved, must be 0. 464 1 bit RESERVED_464 Deprecated, must be 0. 467:465 3 bits Reserved, must be 0. 468 1 bit RESERVED_468 Deprecated, must be 0. @@ -14847,6 +14850,8 @@ Feature :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. Specific (wavefrontsize64) + ``.amdhsa_is_dynamic_stack`` 0 GFX6-GFX11 Controls IS_DYNAMIC_STACK in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. ``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in (except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx11-table`. GFX940) diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -161,7 +161,8 @@ KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ - KERNEL_CODE_PROPERTY(RESERVED1, 11, 5), + KERNEL_CODE_PROPERTY(IS_DYNAMIC_STACK, 11, 1), + KERNEL_CODE_PROPERTY(RESERVED1, 12, 4), }; #undef KERNEL_CODE_PROPERTY diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -415,6 +415,10 @@ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } + if (CurrentProgramInfo.DynamicCallStack) { + KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_IS_DYNAMIC_STACK; + } + return KernelCodeProperties; } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5001,6 +5001,9 @@ PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Val, ValRange); + } else if (ID == ".amdhsa_is_dynamic_stack") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_IS_DYNAMIC_STACK, Val, ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1968,6 +1968,9 @@ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } + PRINT_DIRECTIVE(".amdhsa_is_dynamic_stack", + KERNEL_CODE_PROPERTY_IS_DYNAMIC_STACK); + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) return MCDisassembler::Fail; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -367,6 +367,8 @@ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + PRINT_FIELD(OS, ".amdhsa_is_dynamic_stack", KD, kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_IS_DYNAMIC_STACK); PRINT_FIELD(OS, (hasArchitectedFlatScratch(STI) ? ".amdhsa_enable_private_segment" diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll @@ -65,6 +65,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_is_dynamic_stack 1 ; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -26,6 +26,7 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; VI-NEXT: .amdhsa_is_dynamic_stack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -74,6 +75,7 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GFX9-NEXT: .amdhsa_is_dynamic_stack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -129,6 +131,7 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; VI-NEXT: .amdhsa_is_dynamic_stack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -177,6 +180,7 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GFX9-NEXT: .amdhsa_is_dynamic_stack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -232,6 +236,7 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; VI-NEXT: .amdhsa_is_dynamic_stack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -280,6 +285,7 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GFX9-NEXT: .amdhsa_is_dynamic_stack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f0c0000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -91,6 +91,7 @@ .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 .amdhsa_wavefront_size32 1 + .amdhsa_is_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -134,6 +135,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 // ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_is_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e0c0000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -87,6 +87,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_private_segment_size 1 .amdhsa_wavefront_size32 1 + .amdhsa_is_dynamic_stack 1 .amdhsa_enable_private_segment 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -126,6 +127,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 // ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_is_dynamic_stack 1 // ASM-NEXT: .amdhsa_enable_private_segment 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f080000 00000000 .text // ASM: .text @@ -77,6 +77,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -117,6 +118,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 01510104 130f007f 5e000000 00000000 +// OBJDUMP-NEXT: 0070 01510104 130f007f 5e080000 00000000 .text // ASM: .text @@ -75,6 +75,7 @@ .amdhsa_user_sgpr_kernarg_segment_ptr 1 .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_stack 1 .amdhsa_enable_private_segment 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -112,6 +113,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_stack 1 // ASM-NEXT: .amdhsa_enable_private_segment 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-v3.s @@ -34,7 +34,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f080000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -101,6 +101,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -140,6 +141,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v4.s @@ -34,7 +34,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f080000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -102,6 +102,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -141,6 +142,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1