diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -66,6 +66,24 @@ DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; + //===----------------------------------------------------------------------===// + // AMDGPU specific symbol handling + //===----------------------------------------------------------------------===// + + DecodeStatus onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const; + + DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, + uint64_t KdAddress) const; + + DecodeStatus decodeKernelDescriptorDirective(size_t &CurrentIndex, + ArrayRef Bytes) const; + + DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer) const; + + DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,6 +34,7 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1205,6 +1206,372 @@ return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } +//===----------------------------------------------------------------------===// +// AMDGPU specific symbol handling +//===----------------------------------------------------------------------===// +MCDisassembler::DecodeStatus +AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const { + // Right now only kernel descriptor needs to be handled. + // We ignore all other symbols for target specific handling. + // TODO: + // Fix the spurious symbol issue for AMDGPU kenrels. Exists for both Code + // Object V2 and V3. It doesn't affect decoding kernel descriptor for V3. + + // amd_kernel_code_t for Code Object V2 + if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // Right now this condition will always evaluate to false. The issue with 2 + // symbols for kernels must be fixed to support this. + + Size = 256; + return MCDisassembler::SoftFail; + } + + // Code Object V3 kernel descriptors + StringRef Name = Symbol.Name; + if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + if (decodeKernelDescriptor(Name, Bytes, Address) == + MCDisassembler::Success) { + Size = Bytes.size(); + return MCDisassembler::Success; + } + return MCDisassembler::Fail; + } + return MCDisassembler::Ignore; +} + +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( + StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned + if (Bytes.size() != 64 || KdAddress % 64 != 0) + return MCDisassembler::Fail; + + outs() << ".amdhsa_kernel " << KdName.drop_back(3).str() << "\n"; + + size_t CurrentIndex = 0; + while (CurrentIndex < Bytes.size()) { + MCDisassembler::DecodeStatus Status = + decodeKernelDescriptorDirective(CurrentIndex, Bytes); + + if (Status == MCDisassembler::Fail) + return MCDisassembler::Fail; + } + outs() << ".end_amdhsa_kernel\n"; + return MCDisassembler::Success; +} + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeKernelDescriptorDirective( + size_t &CurrentIndex, ArrayRef Bytes) const { + uint16_t TwoByteBuffer = 0; + uint32_t FourByteBuffer = 0; + uint64_t EightByteBuffer = 0; + + StringRef ReservedBytes; + const std::string Indent = "\t"; + const std::string ErrorMessagePrefix = "Invalid kernel descriptor : "; + + DataExtractor DE(Bytes, /*IsLittleEndian =*/true, /*AddressSize =*/64); + Error *Err = nullptr; + + switch (CurrentIndex) { + case 0: + FourByteBuffer = DE.getU32(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + outs() << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer + << "\n"; + return MCDisassembler::Success; + + case 4: // 0 + 4 + FourByteBuffer = DE.getU32(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + outs() << Indent << ".amdhsa_private_segment_fixed_size " << FourByteBuffer + << "\n"; + return MCDisassembler::Success; + + case 8: // 4 + 4 + // 8 reserved bytes, must be 0. + EightByteBuffer = DE.getU64(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + + if (EightByteBuffer) { + outs() << Indent << ErrorMessagePrefix + << "Bytes 8 to 15 are reserved and must be 0\n"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case 16: // 8 + 8 + // KERNEL_CODE_ENTRY_BYTE_OFFSET + // So far no directive controls this for Code Object V3, so simply skip for + // disassembly. + CurrentIndex += 8; + return MCDisassembler::Success; + + case 24: // 16 + 8 + // 20 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(&CurrentIndex, 20, Err); + if (Err) + return MCDisassembler::Fail; + + for (int i = 0; i < 20; ++i) { + if (ReservedBytes[i] != 0) { + outs() << Indent << ErrorMessagePrefix + << "Bytes 24 to 43 are reserved and must be 0\n"; + return MCDisassembler::Fail; + } + } + return MCDisassembler::Success; + + case 44: // 24 + 20 + // COMPUTE_PGM_RSRC3 + // - Only set for GFX10, GFX6-9 have this to be 0 + // - Currently no directives directly control this + FourByteBuffer = DE.getU32(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + + if (!isGFX10() && FourByteBuffer) { + outs() << Indent << ErrorMessagePrefix + << "COMPUTE_PGM_RSRC3 must be 0 for GFX6-9\n"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case 48: // 44 + 4 + // COMPUTE_PGM_RSRC1 + FourByteBuffer = DE.getU32(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + + if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer) == MCDisassembler::Fail) { + outs() << Indent << ErrorMessagePrefix + << "COMPUTE_PGM_RSRC1 has invalid bits / bytes\n"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case 52: // 48 + 4 + // COMPUTE_PGM_RSRC2 + FourByteBuffer = DE.getU32(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + + if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer) == MCDisassembler::Fail) { + outs() << ErrorMessagePrefix + << "COMPUTE_PGM_RSRC2 has invalid bits / bytes\n"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case 56: // 52 + 4 + TwoByteBuffer = DE.getU16(&CurrentIndex, Err); + if (Err) + return MCDisassembler::Fail; + + outs() << Indent << ".amdhsa_user_sgpr_private_segment_buffer " + << (TwoByteBuffer & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_user_sgpr_dispatch_ptr " + << ((TwoByteBuffer >> 1) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_user_sgpr_queue_ptr " + << ((TwoByteBuffer >> 2) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_user_sgpr_kernarg_segment_ptr " + << ((TwoByteBuffer >> 3) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_user_sgpr_dispatch_id " + << ((TwoByteBuffer >> 4) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_user_sgpr_flat_scratch_init " + << ((TwoByteBuffer >> 5) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_user_sgpr_private_segment_size " + << ((TwoByteBuffer >> 6) & (uint32_t)1) << "\n"; + + // Next 3 bits are reserved, must be 0. + if (((TwoByteBuffer >> 7) & (uint32_t)0x7) != 0) { + outs() << Indent << ErrorMessagePrefix + << "Bits 457:455 are reserved and must be 0\n"; + return MCDisassembler::Fail; + } + + outs() << Indent << ".amdhsa_wavefront_size32: " + << ((TwoByteBuffer >> 10) & (uint32_t)1) << "\n"; + + // Rest of the bits are reserved and must be 0. + if (((TwoByteBuffer >> 11) & (uint32_t)0x1F) != 0) { + outs() << Indent << ErrorMessagePrefix + << "Bits 463:459 are reserved and must be 0\n"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case 58: // 56 + 2 + // 6 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(&CurrentIndex, 6, Err); + if (Err) + return MCDisassembler::Fail; + + for (int i = 0; i < 6; ++i) { + if (ReservedBytes[i] != 0) { + outs() << Indent << ErrorMessagePrefix + << "Bytes 58 to 63 are reserved and must be 0\n"; + return MCDisassembler::Fail; + } + } + // 58 + 6 = 64. End of kernel descriptor. + return MCDisassembler::Success; + } +} + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer) const { + // Decode as directives that handle COMPUTE_PGM_RSRC1 + const std::string Indent = "\t"; + const std::string ErrorMessagePrefix = "Invalid COMPUTE_PGM_RSRC1 : "; + + // We can not accurately backward compute #VGPRs used from + // GRANULATED_WORKITEM_VGPR_COUNT. So we 'decode' as Code Object V3 predefined + // symbol. + outs() << Indent << ".amdhsa_next_free_vgpr " + << ".amdgcn.next_free_vgpr" + << "\n"; + + // We can not backward compute values used to calculate + // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence values for following directives can + // not be computed: .amdhsa_reserve_vcc .amdhsa_reserve_flat_scratch + // .amdhsa_reserve_xnack_mask + // They take default values if not specified in assembly. Hence it is + // still round trippable but the re-assembled binary may not be identical. + // So we decode as Code Object V3 predefined symbol. + outs() << Indent << ".amdhsa_next_free_sgpr " + << ".amdgcn.next_free_sgpr\n"; + + // 11:10 bits (PRIORITY) must be 0 + if (((FourByteBuffer >> 10) & (uint32_t)3) != 0) { + outs() << Indent << ErrorMessagePrefix + << "11:10 bits (PRIORITY) must be 0\n"; + return MCDisassembler::Fail; + } + outs() << Indent << ".amdhsa_float_round_mode_32 " + << ((FourByteBuffer >> 12) & (uint32_t)3) << "\n"; + outs() << Indent << ".amdhsa_float_round_mode_16_64 " + << ((FourByteBuffer >> 14) & (uint32_t)3) << "\n"; + outs() << Indent << ".amdhsa_float_denorm_mode_32 " + << ((FourByteBuffer >> 16) & (uint32_t)3) << "\n"; + outs() << Indent << ".amdhsa_float_denorm_mode_16_64 " + << ((FourByteBuffer >> 18) & (uint32_t)3) << "\n"; + + // 20th bit (PRIV) must be 0 + if (((FourByteBuffer >> 20) & (uint32_t)1) != 0) { + outs() << Indent << ErrorMessagePrefix << "20th bit (PRIV) must be 0.\n"; + return MCDisassembler::Fail; + } + outs() << Indent << ".amdhsa_dx10_clamp " + << ((FourByteBuffer >> 21) & (uint32_t)1) << "\n"; + + // 22nd bit (DEBUG_MODE) must be 0 + if (((FourByteBuffer >> 22) & (uint32_t)1) != 0) { + outs() << Indent << ErrorMessagePrefix + << "22nd bit (DEBUG_MODE) must be 0.\n"; + return MCDisassembler::Fail; + } + outs() << Indent << ".amdhsa_ieee_mode " + << ((FourByteBuffer >> 23) & (uint32_t)1) << "\n"; + + // 24th bit (BULKY) must be 0 + if (((FourByteBuffer >> 24) & (uint32_t)1) != 0) { + outs() << Indent << ErrorMessagePrefix << "24th bit (BULKY) must be 0.\n"; + return MCDisassembler::Fail; + } + // 25th bit (CDBG_USER) must be 0 + if (((FourByteBuffer >> 25) & (uint32_t)1) != 0) { + outs() << Indent << ErrorMessagePrefix + << "25th bit (CDBG_USER) must be 0.\n"; + return MCDisassembler::Fail; + } + + outs() << Indent << ".amdhsa_fp16_overflow " + << ((FourByteBuffer >> 26) & (uint32_t)1) << "\n"; + + // next two bits are reserved amd must be 0 + if (((FourByteBuffer >> 27) & (uint32_t)3) != 0) { + outs() << Indent << ErrorMessagePrefix + << "Bits 28:27 are reserved amd must be 0\n"; + return MCDisassembler::Fail; + } + + outs() << Indent << ".amdhsa_workgroup_processor_mode " + << ((FourByteBuffer >> 29) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_memory_ordered " + << ((FourByteBuffer >> 30) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_forward_progress " << (FourByteBuffer >> 31) + << "\n"; + return MCDisassembler::Success; +} + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer) const { + // Decode as directives that handle COMPUTE_PGM_RSRC2 + const std::string ErrorMessagePrefix = "Invalid COMPUTE_PGM_RSRC2 : "; + const std::string Indent = "\t"; + + outs() << Indent << ".amdhsa_system_sgpr_private_segment_wavefront_offset " + << (FourByteBuffer & ((uint32_t)1)) << "\n"; + outs() << Indent << ".amdhsa_system_sgpr_workgroup_id_x " + << ((FourByteBuffer >> 7) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_system_sgpr_workgroup_id_y " + << ((FourByteBuffer >> 8) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_system_sgpr_workgroup_id_z " + << ((FourByteBuffer >> 9) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_system_sgpr_workgroup_info " + << ((FourByteBuffer >> 10) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_system_vgpr_workitem_id: " + << ((FourByteBuffer >> 11) & (uint32_t)3) << "\n"; + + // 13th bit (ENABLE_EXCEPTION_ADDRESS_WATCH) must be 0 + if (((FourByteBuffer >> 13) & (uint32_t)1) != 0) { + outs() << Indent << ErrorMessagePrefix + << "13th bit (ENABLE_EXCEPTION_ADDRESS_WATCH) must be 0.\n"; + return MCDisassembler::Fail; + } + // 14th bit (ENABLE_EXCEPTION_MEMORY) must be 0 + if (((FourByteBuffer >> 14) & (uint32_t)1) != 0) { + outs() << Indent << ErrorMessagePrefix + << "14th bit (ENABLE_EXCEPTION_MEMORY) must be 0.\n"; + return MCDisassembler::Fail; + } + // 23:15 bits (GRANULATED_LDS_SIZE) must be 0 + if (((FourByteBuffer >> 15) & (uint32_t)0x1FF) != 0) { + outs() << Indent << ErrorMessagePrefix + << "23:15 bits (GRANULATED_LDS_SIZE) must be 0."; + return MCDisassembler::Fail; + } + + outs() << Indent << ".amdhsa_exception_fp_ieee_invalid_op " + << ((FourByteBuffer >> 24) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_exception_fp_denorm_src " + << ((FourByteBuffer >> 25) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_exception_fp_ieee_div_zero " + << ((FourByteBuffer >> 26) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_exception_fp_ieee_overflow " + << ((FourByteBuffer >> 27) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_exception_fp_ieee_underflow " + << ((FourByteBuffer >> 28) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_exception_fp_ieee_inexact " + << ((FourByteBuffer >> 29) & (uint32_t)1) << "\n"; + outs() << Indent << ".amdhsa_exception_int_div_zero " + << ((FourByteBuffer >> 30) & (uint32_t)1) << "\n"; + + // Last bit. Reserved, must be 0. + if ((FourByteBuffer >> 31) != 0) { + outs() << Indent << ErrorMessagePrefix + << "31st bit is reserved, must be 0\n"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1394,22 +1394,23 @@ outs() << SectionName << ":\n"; } - if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { - if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // Skip amd_kernel_code_t at the begining of kernel symbol (256bytes). - Start += 256; - } - if (SI == SE - 1 || - Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // cut trailing zeroes at the end of kernel - // cut up to 256 bytes - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && *reinterpret_cast( - &Bytes[End - 4]) == 0) - End -= 4; - } - } + // if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { + // if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // // Skip amd_kernel_code_t at the begining of kernel symbol + // (256bytes). Start += 256; + // } + // if (SI == SE - 1 || + // Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // // cut trailing zeroes at the end of kernel + // // cut up to 256 bytes + // const uint64_t EndAlign = 256; + // const auto Limit = End - (std::min)(EndAlign, End - Start); + // while (End > Limit && *reinterpret_cast( + // &Bytes[End - 4]) == 0) + // End -= 4; + // } + // } outs() << '\n'; if (!NoLeadingAddr)