diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5237,7 +5237,7 @@ if (IVersion.Major >= 10) { // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS - if (SharedVGPRCount && EnableWavefrontSize32) { + if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { return TokError("shared_vgpr_count directive not valid on " "wavefront size 32"); } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -25,6 +25,7 @@ namespace llvm { +class MCAsmInfo; class MCInst; class MCOperand; class MCSubtargetInfo; @@ -92,10 +93,12 @@ private: std::unique_ptr const MCII; const MCRegisterInfo &MRI; + const MCAsmInfo &MAI; const unsigned TargetMaxInstBytes; mutable ArrayRef Bytes; mutable uint32_t Literal; mutable bool HasLiteral; + mutable std::optional EnableWavefrontSize32; public: AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -45,11 +45,9 @@ using DecodeStatus = llvm::MCDisassembler::DecodeStatus; AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, - MCContext &Ctx, - MCInstrInfo const *MCII) : - MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), - TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) { - + MCContext &Ctx, MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), + MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) { // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus()) report_fatal_error("Disassembly not yet supported for subtarget"); @@ -1632,6 +1630,11 @@ do { \ KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \ } while (0) +#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \ + << GET_FIELD(MASK) << '\n'; \ + } while (0) // NOLINTNEXTLINE(readability-identifier-naming) MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( @@ -1647,8 +1650,9 @@ uint32_t GranulatedWorkitemVGPRCount = GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT); - uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * - AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + uint32_t NextFreeVGPR = + (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32); KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; @@ -1786,11 +1790,40 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { using namespace amdhsa; - if (!isGFX10Plus() && FourByteBuffer) { + StringRef Indent = "\t"; + if (isGFX90A()) { + KdStream << Indent << ".amdhsa_accum_offset " + << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4 + << '\n'; + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0) + return MCDisassembler::Fail; + PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT); + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1) + return MCDisassembler::Fail; + } else if (isGFX10Plus()) { + if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { + PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", + COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + } else { + PRINT_PSEUDO_DIRECTIVE_COMMENT( + "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + } + PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", + COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", + COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", + COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END); + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0) + return MCDisassembler::Fail; + PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", + COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); + } else if (FourByteBuffer) { return MCDisassembler::Fail; } return MCDisassembler::Success; } +#undef PRINT_PSEUDO_DIRECTIVE_COMMENT #undef PRINT_DIRECTIVE #undef GET_FIELD @@ -1935,6 +1968,20 @@ if (Bytes.size() != 64 || KdAddress % 64 != 0) return MCDisassembler::Fail; + // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10 + // requires us to know the setting of .amdhsa_wavefront_size32 in order to + // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong + // order. Workaround this by first looking up .amdhsa_wavefront_size32 here + // when required. + if (isGFX10Plus()) { + uint16_t KernelCodeProperties = + support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET], + support::endianness::little); + EnableWavefrontSize32 = + AMDHSA_BITS_GET(KernelCodeProperties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + std::string Kd; raw_string_ostream KdStream(Kd); KdStream << ".amdhsa_kernel " << KdName << '\n'; diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s @@ -0,0 +1,232 @@ +;; Test disassembly for gfx10 kernel descriptor. + +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- 1.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1010 < 1.s > 1.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1010 < 1-disasm.s > 1-disasm.o +; RUN: cmp 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: ; SHARED_VGPR_COUNT 0 +; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: ; TRAP_ON_START 0 +; CHECK-NEXT: ; TRAP_ON_END 0 +; CHECK-NEXT: ; IMAGE_OP 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1 +; CHECK-NEXT: .amdhsa_memory_ordered 1 +; CHECK-NEXT: .amdhsa_forward_progress 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_wavefront_size32 1 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 + .amdhsa_wavefront_size32 1 +.end_amdhsa_kernel + +;--- 2.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 2.s > 2.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 2-disasm.s > 2-disasm.o +; RUN: cmp 2.o 2-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_shared_vgpr_count 0 +; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: ; TRAP_ON_START 0 +; CHECK-NEXT: ; TRAP_ON_END 0 +; CHECK-NEXT: ; IMAGE_OP 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1 +; CHECK-NEXT: .amdhsa_memory_ordered 1 +; CHECK-NEXT: .amdhsa_forward_progress 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 + .amdhsa_shared_vgpr_count 0 +.end_amdhsa_kernel + +;--- 3.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 3.s > 3.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 3-disasm.s > 3-disasm.o +; RUN: cmp 3.o 3-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_shared_vgpr_count 1 +; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: ; TRAP_ON_START 0 +; CHECK-NEXT: ; TRAP_ON_END 0 +; CHECK-NEXT: ; IMAGE_OP 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1 +; CHECK-NEXT: .amdhsa_memory_ordered 1 +; CHECK-NEXT: .amdhsa_forward_progress 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 + .amdhsa_shared_vgpr_count 1 +.end_amdhsa_kernel + +;--- 4.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 4.s > 4.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 4.o | tail -n +7 | tee 4-disasm.s | FileCheck 4.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 4-disasm.s > 4-disasm.o +; RUN: cmp 4.o 4-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_shared_vgpr_count 1 +; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: ; TRAP_ON_START 0 +; CHECK-NEXT: ; TRAP_ON_END 0 +; CHECK-NEXT: ; IMAGE_OP 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1 +; CHECK-NEXT: .amdhsa_memory_ordered 1 +; CHECK-NEXT: .amdhsa_forward_progress 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_wavefront_size32 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 + .amdhsa_shared_vgpr_count 1 + .amdhsa_wavefront_size32 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s @@ -0,0 +1,153 @@ +;; Test disassembly for gfx90a kernel descriptor. + +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- 1.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 1.s > 1.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 1-disasm.s > 1-disasm.o +; RUN: cmp 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_accum_offset 4 +; CHECK-NEXT: .amdhsa_tg_split 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 8 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +;--- 2.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 2.s > 2.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 2-disasm.s > 2-disasm.o +; RUN: cmp 2.o 2-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_accum_offset 8 +; CHECK-NEXT: .amdhsa_tg_split 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 8 +.end_amdhsa_kernel + +;--- 3.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 3.s > 3.o +; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s +; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 3-disasm.s > 3-disasm.o +; RUN: cmp 3.o 3-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_accum_offset 12 +; CHECK-NEXT: .amdhsa_tg_split 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 12 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -1,7 +1,7 @@ ;; Entirely zeroed kernel descriptor (for GFX10). ; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack -filetype=obj -o %t -; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s +; RUN: llvm-objdump -s -d -j .text %t | FileCheck --check-prefix=OBJDUMP %s ;; TODO: ;; This file and kd-zeroed-raw.s should produce the same output for the kernel @@ -11,10 +11,62 @@ ;; Check the raw bytes right now. -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-LABEL: Contents of section .text: +; OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 ; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 +; OBJDUMP-EMPTY: + +; OBJDUMP-LABEL: Disassembly of section .text: +; OBJDUMP-EMPTY: +; OBJDUMP-NEXT: 0000000000000000 : +; OBJDUMP-NEXT: .amdhsa_kernel my_kernel +; OBJDUMP-NEXT: .amdhsa_group_segment_fixed_size 0 +; OBJDUMP-NEXT: .amdhsa_private_segment_fixed_size 0 +; OBJDUMP-NEXT: .amdhsa_kernarg_size 0 +; OBJDUMP-NEXT: .amdhsa_shared_vgpr_count 0 +; OBJDUMP-NEXT: ; INST_PREF_SIZE 0 +; OBJDUMP-NEXT: ; TRAP_ON_START 0 +; OBJDUMP-NEXT: ; TRAP_ON_END 0 +; OBJDUMP-NEXT: ; IMAGE_OP 0 +; OBJDUMP-NEXT: .amdhsa_next_free_vgpr 8 +; OBJDUMP-NEXT: .amdhsa_reserve_vcc 0 +; OBJDUMP-NEXT: .amdhsa_reserve_flat_scratch 0 +; OBJDUMP-NEXT: .amdhsa_reserve_xnack_mask 0 +; OBJDUMP-NEXT: .amdhsa_next_free_sgpr 8 +; OBJDUMP-NEXT: .amdhsa_float_round_mode_32 0 +; OBJDUMP-NEXT: .amdhsa_float_round_mode_16_64 0 +; OBJDUMP-NEXT: .amdhsa_float_denorm_mode_32 0 +; OBJDUMP-NEXT: .amdhsa_float_denorm_mode_16_64 0 +; OBJDUMP-NEXT: .amdhsa_dx10_clamp 0 +; OBJDUMP-NEXT: .amdhsa_ieee_mode 0 +; OBJDUMP-NEXT: .amdhsa_fp16_overflow 0 +; OBJDUMP-NEXT: .amdhsa_workgroup_processor_mode 0 +; OBJDUMP-NEXT: .amdhsa_memory_ordered 0 +; OBJDUMP-NEXT: .amdhsa_forward_progress 0 +; OBJDUMP-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 +; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; OBJDUMP-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; OBJDUMP-NEXT: .amdhsa_exception_fp_denorm_src 0 +; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; OBJDUMP-NEXT: .amdhsa_exception_int_div_zero 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; OBJDUMP-NEXT: .amdhsa_wavefront_size32 0 +; OBJDUMP-NEXT: .end_amdhsa_kernel .amdhsa_kernel my_kernel .amdhsa_group_segment_fixed_size 0 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1718,8 +1718,9 @@ // distance to the next symbol, and sometimes it will be just a // prologue and we should start disassembling instructions from where // it left off. - outs() << "// Error in decoding " << SymNamesHere[SHI] - << " : Decoding failed region as bytes.\n"; + outs() << Ctx.getAsmInfo()->getCommentString() + << " error in decoding " << SymNamesHere[SHI] + << " : decoding failed region as bytes.\n"; for (uint64_t I = 0; I < Size; ++I) { outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) << "\n";