diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3997,7 +3997,10 @@ - If 1 execute in native wavefront size 32 mode. - 463:459 1 bit Reserved, must be 0. + 459 1 bit IS_DYNAMIC_CALLSTACK Indicates if the generated + machine code is using a + dynamically sized call stack. + 463:460 1 bit Reserved, must be 0. 464 1 bit RESERVED_464 Deprecated, must be 0. 467:465 3 bits Reserved, must be 0. 468 1 bit RESERVED_468 Deprecated, must be 0. @@ -14847,6 +14850,8 @@ Feature :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. Specific (wavefrontsize64) + ``.amdhsa_is_dynamic_callstack`` 0 GFX6-GFX11 Controls IS_DYNAMIC_CALLSTACK in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. ``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in (except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx11-table`. GFX940) diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,6 +162,7 @@ KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ KERNEL_CODE_PROPERTY(RESERVED1, 11, 5), + KERNEL_CODE_PROPERTY(IS_DYNAMIC_CALLSTACK, 20, 1), }; #undef KERNEL_CODE_PROPERTY @@ -176,8 +177,8 @@ uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+ uint32_t compute_pgm_rsrc1; uint32_t compute_pgm_rsrc2; - uint16_t kernel_code_properties; - uint8_t reserved2[6]; + uint32_t kernel_code_properties; + uint8_t reserved2[4]; }; enum : uint32_t { @@ -191,7 +192,7 @@ COMPUTE_PGM_RSRC1_OFFSET = 48, COMPUTE_PGM_RSRC2_OFFSET = 52, KERNEL_CODE_PROPERTIES_OFFSET = 56, - RESERVED2_OFFSET = 58, + RESERVED2_OFFSET = 60, }; static_assert( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -70,8 +70,7 @@ uint64_t CodeSize, const AMDGPUMachineFunction* MFI); - uint16_t getAmdhsaKernelCodeProperties( - const MachineFunction &MF) const; + uint32_t getAmdhsaKernelCodeProperties(const MachineFunction &MF) const; amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor( const MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -381,10 +381,10 @@ false); } -uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( +uint32_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo(); - uint16_t KernelCodeProperties = 0; + uint32_t KernelCodeProperties = 0; if (MFI.hasPrivateSegmentBuffer()) { KernelCodeProperties |= @@ -415,6 +415,10 @@ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } + if (CurrentProgramInfo.DynamicCallStack) { + KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK; + } + return KernelCodeProperties; } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5001,6 +5001,10 @@ PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Val, ValRange); + } else if (ID == ".amdhsa_is_dynamic_callstack") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK, Val, + ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1841,19 +1841,10 @@ return MCDisassembler::Success; } -#undef PRINT_DIRECTIVE - MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptorDirective( DataExtractor::Cursor &Cursor, ArrayRef Bytes, raw_string_ostream &KdStream) const { -#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ - do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ - } while (0) - - uint16_t TwoByteBuffer = 0; uint32_t FourByteBuffer = 0; StringRef ReservedBytes; @@ -1936,7 +1927,7 @@ case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: using namespace amdhsa; - TwoByteBuffer = DE.getU16(Cursor); + FourByteBuffer = DE.getU32(Cursor); if (!hasArchitectedFlatScratch()) PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", @@ -1955,27 +1946,30 @@ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) + if (FourByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) return MCDisassembler::Fail; // Reserved for GFX9 if (isGFX9() && - (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { + (FourByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { return MCDisassembler::Fail; } else if (isGFX10Plus()) { PRINT_DIRECTIVE(".amdhsa_wavefront_size32", KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) + PRINT_DIRECTIVE(".amdhsa_is_dynamic_callstack", + KERNEL_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK); + + if (FourByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) return MCDisassembler::Fail; return MCDisassembler::Success; case amdhsa::RESERVED2_OFFSET: // 6 bytes from here are reserved, must be 0. - ReservedBytes = DE.getBytes(Cursor, 6); - for (int I = 0; I < 6; ++I) { + ReservedBytes = DE.getBytes(Cursor, 4); + for (int I = 0; I < 4; ++I) { if (ReservedBytes[I] != 0) return MCDisassembler::Fail; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -367,6 +367,8 @@ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + PRINT_FIELD(OS, ".amdhsa_is_dynamic_callstack", KD, kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK); PRINT_FIELD(OS, (hasArchitectedFlatScratch(STI) ? ".amdhsa_enable_private_segment" @@ -895,7 +897,7 @@ Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3); Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1); Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2); - Streamer.emitInt16(KernelDescriptor.kernel_code_properties); + Streamer.emitInt32(KernelDescriptor.kernel_code_properties); for (uint8_t Res : KernelDescriptor.reserved2) Streamer.emitInt8(Res); } diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll @@ -65,6 +65,7 @@ ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_is_dynamic_callstack 1 ; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -26,6 +26,7 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; VI-NEXT: .amdhsa_is_dynamic_callstack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -74,6 +75,7 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GFX9-NEXT: .amdhsa_is_dynamic_callstack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -129,6 +131,7 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; VI-NEXT: .amdhsa_is_dynamic_callstack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -177,6 +180,7 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GFX9-NEXT: .amdhsa_is_dynamic_callstack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -232,6 +236,7 @@ ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; VI-NEXT: .amdhsa_is_dynamic_callstack 0 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -280,6 +285,7 @@ ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GFX9-NEXT: .amdhsa_is_dynamic_callstack 0 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f041000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -91,6 +91,7 @@ .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 .amdhsa_wavefront_size32 1 + .amdhsa_is_dynamic_callstack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -134,6 +135,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 // ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_is_dynamic_callstack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e041000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -87,6 +87,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_private_segment_size 1 .amdhsa_wavefront_size32 1 + .amdhsa_is_dynamic_callstack 1 .amdhsa_enable_private_segment 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -126,6 +127,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 // ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_is_dynamic_callstack 1 // ASM-NEXT: .amdhsa_enable_private_segment 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f001000 00000000 .text // ASM: .text @@ -77,6 +77,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_callstack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -117,6 +118,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_callstack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 01510104 130f007f 5e000000 00000000 +// OBJDUMP-NEXT: 0070 01510104 130f007f 5e001000 00000000 .text // ASM: .text @@ -75,6 +75,7 @@ .amdhsa_user_sgpr_kernarg_segment_ptr 1 .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_callstack 1 .amdhsa_enable_private_segment 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -112,6 +113,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_callstack 1 // ASM-NEXT: .amdhsa_enable_private_segment 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-v3.s @@ -34,7 +34,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f001000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -101,6 +101,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_callstack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -140,6 +141,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_callstack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v4.s @@ -34,7 +34,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f001000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -102,6 +102,7 @@ .amdhsa_user_sgpr_dispatch_id 1 .amdhsa_user_sgpr_flat_scratch_init 1 .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_is_dynamic_callstack 1 .amdhsa_system_sgpr_private_segment_wavefront_offset 1 .amdhsa_system_sgpr_workgroup_id_x 0 .amdhsa_system_sgpr_workgroup_id_y 1 @@ -141,6 +142,7 @@ // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_is_dynamic_callstack 1 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1