diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3219,9 +3219,6 @@ arguments in the kernarg segment. Must be a power of 2. - ".uses_dynamic_stack" boolean Indicates if the generated - machine code is using a - dynamically sized stack. ".wavefront_size" integer Required Wavefront size. Must be a power of 2. ".sgpr_count" integer Required Number of scalar @@ -3553,7 +3550,8 @@ Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table -:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5` and table +:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table +:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v5` and table :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-map-table-v5`. .. table:: AMDHSA Code Object V5 Metadata Map Changes @@ -3568,6 +3566,17 @@ version. Currently 2. ================= ============== ========= ======================================= +.. + + .. table:: AMDHSA Code Object V5 Kernel Metadata Map Additions + :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v5 + + ===================== ============= ========== ======================================= + String Key Value Type Required? Description + ===================== ============= ========== ======================================= + ".uses_dynamic_stack" boolean Indicates if the generated machine code + is using a dynamically sized stack. + ===================== ============= ========== ======================================= .. .. table:: AMDHSA Code Object V5 Kernel Argument Metadata Map Additions and Changes @@ -4004,6 +4013,8 @@ 459 1 bit USES_DYNAMIC_STACK Indicates if the generated machine code is using a dynamically sized stack. + This is only set in code + object v5 and later. 463:460 1 bit Reserved, must be 0. 464 1 bit RESERVED_464 Deprecated, must be 0. 467:465 3 bits Reserved, must be 0. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,7 +417,8 @@ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } - if (CurrentProgramInfo.DynamicCallStack) { + if (CurrentProgramInfo.DynamicCallStack && + AMDGPU::getAmdhsaCodeObjectVersion() >= 5) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -874,8 +874,9 @@ Kern.getDocument()->getNode(ProgramInfo.LDSSize); Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); - Kern[".uses_dynamic_stack"] = - Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); + if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + Kern[".uses_dynamic_stack"] = + Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); // FIXME: The metadata treats the minimum as 16? Kern[".kernarg_segment_align"] = diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2089,8 +2089,9 @@ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } - PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", - KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); + if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) return MCDisassembler::Fail; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -367,8 +367,9 @@ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); - PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); + if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); PRINT_FIELD(OS, (hasArchitectedFlatScratch(STI) ? ".amdhsa_enable_private_segment" diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll --- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll @@ -36,7 +36,6 @@ ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 -; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 0 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -65,7 +64,6 @@ ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 -; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -98,7 +96,6 @@ ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 -; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 0 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -145,7 +142,6 @@ ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 -; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll --- a/llvm/test/CodeGen/AMDGPU/recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/recursion.ll @@ -36,6 +36,7 @@ ; ; V5-LABEL: {{^}}calls_recursive: ; V5: .amdhsa_private_segment_fixed_size 0{{$}} +; V5: .amdhsa_uses_dynamic_stack 1 define amdgpu_kernel void @calls_recursive() { call void @recursive() ret void @@ -59,6 +60,7 @@ ; ; V5-LABEL: {{^}}kernel_calls_tail_recursive: ; V5: .amdhsa_private_segment_fixed_size 0{{$}} +; V5: .amdhsa_uses_dynamic_stack 1 define amdgpu_kernel void @kernel_calls_tail_recursive() { call void @tail_recursive() ret void @@ -69,6 +71,7 @@ ; ; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: ; V5: .amdhsa_private_segment_fixed_size 8{{$}} +; V5: .amdhsa_uses_dynamic_stack 1 define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() { call void @tail_recursive_with_stack() ret void diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll --- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -o - %s | FileCheck -check-prefix=GCN-V5 %s ; Make sure there's no assertion when trying to report the resource ; usage for a function which becomes dead during codegen. @@ -20,7 +21,8 @@ ; GCN: s_endpgm ; GCN: .amdhsa_private_segment_fixed_size 0 -; GCN: .amdhsa_uses_dynamic_stack 0 +; GCN-NOT: .amdhsa_uses_dynamic_stack 0 +; GCN-V5: .amdhsa_uses_dynamic_stack 0 define amdgpu_kernel void @entry() { bb0: br i1 false, label %bb1, label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -26,7 +26,6 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -; VI-NEXT: .amdhsa_uses_dynamic_stack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -75,7 +74,6 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -; GFX9-NEXT: .amdhsa_uses_dynamic_stack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -131,7 +129,6 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -; VI-NEXT: .amdhsa_uses_dynamic_stack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -180,7 +177,6 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -; GFX9-NEXT: .amdhsa_uses_dynamic_stack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -236,7 +232,6 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -; VI-NEXT: .amdhsa_uses_dynamic_stack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -285,7 +280,6 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -; GFX9-NEXT: .amdhsa_uses_dynamic_stack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f0c0000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -91,7 +91,6 @@ .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 .amdhsa_wavefront_size32 1 - .amdhsa_uses_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -135,7 +134,6 @@ // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 // ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_uses_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e0c0000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -87,7 +87,6 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_private_segment_size 1 .amdhsa_wavefront_size32 1 - .amdhsa_uses_dynamic_stack 1 .amdhsa_enable_private_segment 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -127,7 +126,6 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 // ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_uses_dynamic_stack 1 // ASM-NEXT: .amdhsa_enable_private_segment 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f080000 00000000 +// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f000000 00000000 .text // ASM: .text @@ -77,7 +77,6 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_uses_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -118,7 +117,6 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_uses_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 01510104 130f007f 5e080000 00000000 +// OBJDUMP-NEXT: 0070 01510104 130f007f 5e000000 00000000 .text // ASM: .text @@ -75,7 +75,6 @@ .amdhsa_user_sgpr_kernarg_segment_ptr 1 .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_uses_dynamic_stack 1 .amdhsa_enable_private_segment 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -113,7 +112,6 @@ // ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_uses_dynamic_stack 1 // ASM-NEXT: .amdhsa_enable_private_segment 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s @@ -39,7 +39,6 @@ ; CHECK-NEXT: .private_segment_fixed_size: 32 ; CHECK-NEXT: .sgpr_count: 14 ; CHECK-NEXT: .symbol: 'test_kernel@kd' -; CHECK-NEXT: .uses_dynamic_stack: true ; CHECK-NEXT: .vgpr_count: 40 ; CHECK-NEXT: .wavefront_size: 128 ; CHECK-NEXT: amdhsa.printf: @@ -66,7 +65,6 @@ .kernarg_segment_size: 8 .group_segment_fixed_size: 16 .private_segment_fixed_size: 32 - .uses_dynamic_stack: true .kernarg_segment_align: 64 .wavefront_size: 128 .sgpr_count: 14 diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s @@ -20,7 +20,6 @@ // CHECK-NEXT: - 4 // CHECK: .sgpr_count: 14 // CHECK: .symbol: 'test_kernel@kd' -// CHECK: .uses_dynamic_stack: true // CHECK: .vec_type_hint: int // CHECK: .vgpr_count: 40 // CHECK: .wavefront_size: 128 @@ -52,7 +51,6 @@ .kernarg_segment_size: 8 .group_segment_fixed_size: 16 .private_segment_fixed_size: 32 - .uses_dynamic_stack: true .kernarg_segment_align: 64 .wavefront_size: 128 .sgpr_count: 14 diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s @@ -13,7 +13,6 @@ // CHECK: .sgpr_count: 40 // CHECK: .sgpr_spill_count: 1 // CHECK: .symbol: 'test_kernel@kd' -// CHECK: .uses_dynamic_stack: true // CHECK: .vgpr_count: 14 // CHECK: .vgpr_spill_count: 1 // CHECK: .wavefront_size: 64 @@ -33,7 +32,6 @@ .kernarg_segment_size: 24 .group_segment_fixed_size: 24 .private_segment_fixed_size: 16 - .uses_dynamic_stack: true .kernarg_segment_align: 16 .wavefront_size: 64 .max_flat_workgroup_size: 256 diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-v3.s @@ -34,7 +34,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f080000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -101,7 +101,6 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_uses_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -141,7 +140,6 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_uses_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 @@ -261,7 +259,6 @@ .kernarg_segment_size: 8 .group_segment_fixed_size: 16 .private_segment_fixed_size: 32 - .uses_dynamic_stack: true .kernarg_segment_align: 64 .wavefront_size: 128 .sgpr_count: 14 @@ -272,7 +269,6 @@ .kernarg_segment_size: 8 .group_segment_fixed_size: 16 .private_segment_fixed_size: 32 - .uses_dynamic_stack: true .kernarg_segment_align: 64 .wavefront_size: 128 .sgpr_count: 14 @@ -290,7 +286,6 @@ // ASM: .private_segment_fixed_size: 32 // ASM: .sgpr_count: 14 // ASM: .symbol: 'amd_kernel_code_t_test_all@kd' -// ASM: .uses_dynamic_stack: true // ASM: .vgpr_count: 40 // ASM: .wavefront_size: 128 // ASM: - .group_segment_fixed_size: 16 @@ -301,7 +296,6 @@ // ASM: .private_segment_fixed_size: 32 // ASM: .sgpr_count: 14 // ASM: .symbol: 'amd_kernel_code_t_minimal@kd' -// ASM: .uses_dynamic_stack: true // ASM: .vgpr_count: 40 // ASM: .wavefront_size: 128 // ASM: amdhsa.version: diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v4.s @@ -34,7 +34,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f080000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -102,7 +102,6 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_uses_dynamic_stack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -142,7 +141,6 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_uses_dynamic_stack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 @@ -262,7 +260,6 @@ .kernarg_segment_size: 8 .group_segment_fixed_size: 16 .private_segment_fixed_size: 32 - .uses_dynamic_stack: true .kernarg_segment_align: 64 .wavefront_size: 128 .sgpr_count: 14 @@ -272,7 +269,6 @@ .symbol: amd_kernel_code_t_minimal@kd .kernarg_segment_size: 8 .group_segment_fixed_size: 16 - .uses_dynamic_stack: true .private_segment_fixed_size: 32 .kernarg_segment_align: 64 .wavefront_size: 128 @@ -291,7 +287,6 @@ // ASM: .private_segment_fixed_size: 32 // ASM: .sgpr_count: 14 // ASM: .symbol: 'amd_kernel_code_t_test_all@kd' -// ASM: .uses_dynamic_stack: true // ASM: .vgpr_count: 40 // ASM: .wavefront_size: 128 // ASM: - .group_segment_fixed_size: 16 @@ -302,7 +297,6 @@ // ASM: .private_segment_fixed_size: 32 // ASM: .sgpr_count: 14 // ASM: .symbol: 'amd_kernel_code_t_minimal@kd' -// ASM: .uses_dynamic_stack: true // ASM: .vgpr_count: 40 // ASM: .wavefront_size: 128 // ASM: amdhsa.version: diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s copy from llvm/test/MC/AMDGPU/hsa-v4.s copy to llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=5 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=5 -mattr=+xnack -filetype=obj < %s > %t // RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s @@ -272,8 +272,8 @@ .symbol: amd_kernel_code_t_minimal@kd .kernarg_segment_size: 8 .group_segment_fixed_size: 16 - .uses_dynamic_stack: true .private_segment_fixed_size: 32 + .uses_dynamic_stack: true .kernarg_segment_align: 64 .wavefront_size: 128 .sgpr_count: 14 diff --git a/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s b/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s --- a/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s +++ b/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s @@ -10,10 +10,10 @@ #LLVM-NEXT: NoteSection { #LLVM-NEXT: Name: .note #LLVM-NEXT: Offset: 0x40 -#LLVM-NEXT: Size: 0x128 +#LLVM-NEXT: Size: 0x110 #LLVM-NEXT: Note { #LLVM-NEXT: Owner: AMDGPU -#LLVM-NEXT: Data size: 0x111 +#LLVM-NEXT: Data size: 0xFC #LLVM-NEXT: Type: NT_AMDGPU_METADATA (AMDGPU Metadata) #LLVM-NEXT: AMDGPU Metadata: --- #LLVM-NEXT: amdhsa.kernels: @@ -25,7 +25,6 @@ #LLVM-NEXT: .private_segment_fixed_size: 32 #LLVM-NEXT: .sgpr_count: 14 #LLVM-NEXT: .symbol: 'test_kernel@kd' -#LLVM-NEXT: .uses_dynamic_stack: true #LLVM-NEXT: .vgpr_count: 40 #LLVM-NEXT: .wavefront_size: 128 #LLVM-NEXT: amdhsa.version: @@ -39,7 +38,7 @@ # GNU: Displaying notes found in: .note # GNU-NEXT: Owner Data size Description -# GNU-NEXT: AMDGPU 0x00000111 NT_AMDGPU_METADATA (AMDGPU Metadata) +# GNU-NEXT: AMDGPU 0x000000fc NT_AMDGPU_METADATA (AMDGPU Metadata) # GNU-NEXT: AMDGPU Metadata: # GNU-NEXT: --- # GNU-NEXT: amdhsa.kernels: @@ -51,7 +50,6 @@ # GNU-NEXT: .private_segment_fixed_size: 32 # GNU-NEXT: .sgpr_count: 14 # GNU-NEXT: .symbol: 'test_kernel@kd' -# GNU-NEXT: .uses_dynamic_stack: true # GNU-NEXT: .vgpr_count: 40 # GNU-NEXT: .wavefront_size: 128 # GNU-NEXT: amdhsa.version: @@ -71,7 +69,6 @@ .kernarg_segment_size: 8 .max_flat_workgroup_size: 256 .private_segment_fixed_size: 32 - .uses_dynamic_stack: true .sgpr_count: 14 .vgpr_count: 40 .wavefront_size: 128