diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -9,6 +9,13 @@ /// \file /// AMDHSA kernel descriptor definitions. For more information, visit /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor +/// +/// \warning +/// Any changes to this file should also be audited for corresponding changes +/// needed in both the assembler and disassembler, namely: +/// * AMDGPUAsmPrinter.{cpp,h} +/// * AMDGPUTargetStreamer.{cpp,h} +/// * AMDGPUDisassembler.{cpp,h} // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -156,6 +156,13 @@ DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const; + /// Decode as directives that handle COMPUTE_PGM_RSRC3. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC3. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + DecodeStatus convertEXPInst(MCInst &MI) const; DecodeStatus convertVINTERPInst(MCInst &MI) const; DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1806,10 +1806,10 @@ //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// +#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK)) #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \ } while (0) // NOLINTNEXTLINE(readability-identifier-naming) @@ -1824,8 +1824,7 @@ // simply calculate the inverse of what the assembler does. uint32_t GranulatedWorkitemVGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT); uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); @@ -1852,8 +1851,7 @@ // The disassembler cannot recover the original values of those 3 directives. uint32_t GranulatedWavefrontSGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT); if (isGFX10Plus() && GranulatedWavefrontSGPRCount) return MCDisassembler::Fail; @@ -1963,7 +1961,17 @@ return MCDisassembler::Success; } +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + if (!isGFX10Plus() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} #undef PRINT_DIRECTIVE +#undef GET_FIELD MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptorDirective( @@ -2031,30 +2039,16 @@ return MCDisassembler::Success; case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: - // COMPUTE_PGM_RSRC3 - // - Only set for GFX10, GFX6-9 have this to be 0. - // - Currently no directives directly control this. FourByteBuffer = DE.getU32(Cursor); - if (!isGFX10Plus() && FourByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream); case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream); case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream); case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: using namespace amdhsa; diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -1,26 +1,51 @@ ;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble - +; RUN: rm -rf %t && split-file %s %t && cd %t ;--- 1.s ;; Only set next_free_sgpr. -.amdhsa_kernel my_kernel_1 +; RUN: amdgpu-assemble -mcpu=gfx908 <1.s >1.o +; RUN: amdgpu-disassemble-kd 1.o | amdgpu-tee-kd 1-disasm.s | FileCheck 1.s +; RUN: amdgpu-assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o +; RUN: diff 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 4 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 48 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 42 .amdhsa_reserve_flat_scratch 0 @@ -30,7 +55,48 @@ ;--- 2.s ;; Only set other directives. -.amdhsa_kernel my_kernel_2 +; RUN: amdgpu-assemble -mcpu=gfx908 <2.s >2.o +; RUN: amdgpu-disassemble-kd 2.o | amdgpu-tee-kd 2-disasm.s | FileCheck 2.s +; RUN: amdgpu-assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o +; RUN: diff 2.o 2-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 4 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 0 .amdhsa_reserve_flat_scratch 1 @@ -40,7 +106,48 @@ ;--- 3.s ;; Set all affecting directives. -.amdhsa_kernel my_kernel_3 +; RUN: amdgpu-assemble -mcpu=gfx908 <3.s >3.o +; RUN: amdgpu-disassemble-kd 3.o | amdgpu-tee-kd 3-disasm.s | FileCheck 3.s +; RUN: amdgpu-assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o +; RUN: diff 3.o 3-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 4 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 48 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 35 .amdhsa_reserve_flat_scratch 1 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -1,36 +1,144 @@ ;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble +; RUN: rm -rf %t && split-file %s %t && cd %t ;--- 1.s -.amdhsa_kernel my_kernel_1 +; RUN: amdgpu-assemble -mcpu=gfx908 <1.s >1.o +; RUN: amdgpu-disassemble-kd 1.o | amdgpu-tee-kd 1-disasm.s | FileCheck 1.s +; RUN: amdgpu-assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o +; RUN: diff 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 24 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 23 .amdhsa_next_free_sgpr 0 .end_amdhsa_kernel ;--- 2.s -.amdhsa_kernel my_kernel_2 +; RUN: amdgpu-assemble -mcpu=gfx908 <2.s >2.o +; RUN: amdgpu-disassemble-kd 2.o | amdgpu-tee-kd 2-disasm.s | FileCheck 2.s +; RUN: amdgpu-assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o +; RUN: diff 2.o 2-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 16 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 14 .amdhsa_next_free_sgpr 0 .end_amdhsa_kernel ;--- 3.s -.amdhsa_kernel my_kernel_3 +; RUN: amdgpu-assemble -mcpu=gfx908 <3.s >3.o +; RUN: amdgpu-disassemble-kd 3.o | amdgpu-tee-kd 3-disasm.s | FileCheck 3.s +; RUN: amdgpu-assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o +; RUN: diff 3.o 3-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 .amdhsa_next_free_sgpr 0 .end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg @@ -1,2 +1,6 @@ if not 'AMDGPU' in config.root.targets: config.unsupported = True + +config.substitutions.append(('amdgpu-disassemble-kd', 'llvm-objdump --disassemble-symbols=kernel.kd')) +config.substitutions.append(('amdgpu-tee-kd', 'tail -n +7 | tee')) +config.substitutions.append(('amdgpu-assemble', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj'))