Index: lib/Target/AMDGPU/Disassembler/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/Disassembler/CMakeLists.txt +++ lib/Target/AMDGPU/Disassembler/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMAMDGPUDisassembler AMDGPUDisassembler.cpp + HSAObjCodeDisassembler.cpp ) add_dependencies(LLVMAMDGPUDisassembler AMDGPUCommonTableGen) +add_dependencies(LLVMAMDGPUDisassembler LLVMAMDGPUUtils) Index: lib/Target/AMDGPU/Disassembler/HSAObjCodeDisassembler.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/Disassembler/HSAObjCodeDisassembler.cpp @@ -0,0 +1,524 @@ +//===--------------------- HSAObjCodeDisassembler.cpp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// +/// \file - disassebly of HSA Code Object file. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDKernelCodeTUtils.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include +#include +#include +#include +#include +#include "llvm/MC/MCRegisterInfo.h" +#include +#include + + +using namespace llvm; +using namespace object; + + +// AMD GPU Note Type Enumeration Values. +#define NT_AMDGPU_HSA_CODE_OBJECT_VERSION 1 +#define NT_AMDGPU_HSA_HSAIL 2 +#define NT_AMDGPU_HSA_ISA 3 +#define NT_AMDGPU_HSA_PRODUCER 4 +#define NT_AMDGPU_HSA_PRODUCER_OPTIONS 5 +#define NT_AMDGPU_HSA_EXTENSION 6 +#define NT_AMDGPU_HSA_HLDEBUG_DEBUG 101 +#define NT_AMDGPU_HSA_HLDEBUG_TARGET 102 + +LLVM_PACKED_START + +typedef struct amdgpu_hsa_note_code_object_version_s { + uint32_t major_version; + uint32_t minor_version; +} amdgpu_hsa_note_code_object_version_t; + +typedef struct amdgpu_hsa_note_isa_s { + uint16_t vendor_name_size; + uint16_t architecture_name_size; + uint32_t major; + uint32_t minor; + uint32_t stepping; + char vendor_and_architecture_name[1]; +} amdgpu_hsa_note_isa_t; + +LLVM_PACKED_END + +StringRef getVendorName(const amdgpu_hsa_note_isa_t& ISA) { + return StringRef(ISA.vendor_and_architecture_name, + ISA.vendor_name_size - 1); +} +StringRef getArchName(const amdgpu_hsa_note_isa_t& ISA) { + return StringRef(ISA.vendor_and_architecture_name + ISA.vendor_name_size, + ISA.architecture_name_size - 1); +} + +template +static ArrayRef trimTrailingZeroes(ArrayRef A, size_t Limit) { + const auto SizeLimit = (Limit < A.size()) ? (A.size() - Limit) : 0; + while (A.size() > SizeLimit && !A.back()) + A = A.drop_back(); + return A; +} + +// TODO: Move this to ArrayRef.h +template +ArrayRef makeArrayRef(ArrayRef Ref) { + const auto NumBytes = Ref.size() * sizeof(OldT); + assert(0 == (NumBytes % sizeof(NewT))); + return makeArrayRef((const NewT*)Ref.data(), NumBytes / sizeof(NewT)); +} + +// TODO: Move this to elf headers +struct ELFNote { + uint32_t namesz; + uint32_t descsz; + uint32_t type; + + ELFNote() = delete; + ELFNote(const ELFNote&) = delete; + void operator=(const ELFNote&) = delete; + + enum { ALIGN = 4 }; + + const char* name() const { + return reinterpret_cast(this) + sizeof(*this); + } + const char* desc() const { + return name() + alignTo(namesz, ALIGN); + } + template + ErrorOr as() const { + if (descsz < sizeof(D)) + return make_error_code(object_error::parse_failed); + return *reinterpret_cast(desc()); + } + size_t size() const { + return sizeof(*this) + alignTo(namesz, ALIGN) + descsz; + } +}; + +const ELFNote* getNext(const ELFNote& N) { + return reinterpret_cast( + N.desc() + alignTo(N.descsz, ELFNote::ALIGN)); +} + +// TODO: move this template somewhere to include/object +template +class const_varsize_item_iterator : + std::iterator { + ArrayRef Ref; + + const Item *item() const { + return reinterpret_cast(Ref.data()); + } + size_t getItemPadSize() const { + assert(Ref.size() >= sizeof(Item)); + return (const uint8_t*)getNext(*item()) - (const uint8_t*)item(); + } + +public: + const_varsize_item_iterator() {} + const_varsize_item_iterator(ArrayRef Ref_) : Ref(Ref_) {} + + bool valid() const { + return Ref.size() >= sizeof(Item) && Ref.size() >= getItemPadSize(); + } + + ErrorOr operator*() const { + if (!valid()) + return make_error_code(object_error::parse_failed); + return *item(); + } + + bool operator==(const const_varsize_item_iterator &Other) const { + return (Ref.size() == Other.Ref.size()) && + (Ref.empty() || Ref.data() == Other.Ref.data()); + } + + bool operator!=(const const_varsize_item_iterator &Other) const { + return !(*this == Other); + } + + const_varsize_item_iterator &operator++() { // preincrement + Ref = Ref.slice(Ref.size() > sizeof(Item) ? + (std::min)(getItemPadSize(), Ref.size()) : + Ref.size()); + return *this; + } +}; + + +class HSACodeObject { +public: + typedef ELF64LEObjectFile MyELF; + typedef MyELF::Elf_Sym Elf_Sym; + const MyELF& Obj; + + HSACodeObject(const MyELF *Obj_) : Obj(*Obj_) {} + + auto symbols() const -> decltype(Obj.symbols()) { return Obj.symbols(); } + + const Elf_Sym* getELFSymbol(SymbolRef Symbol) const { + return Obj.getSymbol(Symbol.getRawDataRefImpl()); + } + + ArrayRef getSectionContentsAsArray(uint32_t SecIdx) const { + auto ELF = Obj.getELFFile(); + // TODO: check ErrorOr + return *ELF->getSectionContentsAsArray(*ELF->getSection(SecIdx)); + } + + int getSectionIdx(StringRef SecName) const; + + typedef const_varsize_item_iterator const_elf_note_iterator; + + iterator_range notes() const { + const int Idx = getSectionIdx(".note"); + return Idx >= 0 ? notes(Idx) : + make_range(const_elf_note_iterator(), const_elf_note_iterator()); + } + + iterator_range notes(int SecIdx) const { + const auto SecData = getSectionContentsAsArray(SecIdx); + return make_range(const_elf_note_iterator(SecData), + const_elf_note_iterator()); + } + + const Elf_Sym* toKernelSym(SymbolRef Symbol) const { + auto ElfSym = getELFSymbol(Symbol); + return (ElfSym->getType() == ELF::STT_AMDGPU_HSA_KERNEL) ? + ElfSym : nullptr; + } + + const amd_kernel_code_t* getAMDKernelCodeT(const Elf_Sym* ElfSym) const { + assert(ElfSym->getType() == ELF::STT_AMDGPU_HSA_KERNEL); + const auto SecBytes = getSectionContentsAsArray(ElfSym->st_shndx); + const uint64_t Ofs = ElfSym->getValue(); + return reinterpret_cast(SecBytes.data() + Ofs); + } + + uint64_t getKernelStartOffset(const Elf_Sym* ElfSym) const { + assert(ElfSym->getType() == ELF::STT_AMDGPU_HSA_KERNEL); + return ElfSym->getValue() + + getAMDKernelCodeT(ElfSym)->kernel_code_entry_byte_offset; + } + + ArrayRef getKernelCode(const Elf_Sym* ElfSym) const; + +private: + mutable SmallVector StartMarkers; + + const decltype(StartMarkers)& getStartMarkers() const; +}; + +int HSACodeObject::getSectionIdx(StringRef SecName) const { + auto ELF = Obj.getELFFile(); + int Idx = 0; + for (auto S : ELF->sections()) { + // TODO: handle ELF error + if (*ELF->getSectionName(&S) == SecName) + return Idx; + ++Idx; + } + return -1; +} + +ArrayRef +HSACodeObject::getKernelCode(const Elf_Sym* ElfSym) const { + assert(ElfSym->getType() == ELF::STT_AMDGPU_HSA_KERNEL); + + auto &M = getStartMarkers(); + // TODO: check CodeStart/CodeEnd alignment + const uint64_t CodeStart = getKernelStartOffset(ElfSym); + const uint64_t CodeEnd = *std::upper_bound(M.begin(), M.end(), CodeStart); + + auto SecBytes = getSectionContentsAsArray(ElfSym->st_shndx); + return makeArrayRef(SecBytes.slice(CodeStart, + CodeEnd - CodeStart)); +} + +// StartMarkers is a sorted array of every entity's begin in the code section. +// We determine an entity's ending using upper bound on this array because +// there'is no kernel code size specified by design +const decltype(HSACodeObject::StartMarkers)& +HSACodeObject::getStartMarkers() const { + if (!StartMarkers.empty()) return StartMarkers; + + const int HsaTextSecIdx = getSectionIdx(".hsatext"); + if (HsaTextSecIdx < 0) return StartMarkers; + + for (auto &Symbol : Obj.symbols()) { + auto ElfSym = getELFSymbol(Symbol); + if (ElfSym->getType() != ELF::STT_AMDGPU_HSA_KERNEL || + ElfSym->st_shndx != HsaTextSecIdx) + continue; + StartMarkers.push_back(ElfSym->getValue()); + StartMarkers.push_back(getKernelStartOffset(ElfSym)); + } + StartMarkers.push_back(getSectionContentsAsArray(HsaTextSecIdx).size()); + array_pod_sort(StartMarkers.begin(), StartMarkers.end()); + return StartMarkers; +} + +///////////////////////////////////// +// TODO: move this to MCContext + +class OwningMCContext : MCContext { + std::unique_ptr MRI; + std::unique_ptr AsmInfo; + + OwningMCContext(decltype(MRI) &&MRI_, + decltype(AsmInfo) &&AsmInfo_, + const MCObjectFileInfo *MOFI, + const SourceMgr *Mgr, + bool DoAutoReset) + : MCContext(AsmInfo_.get(), MRI_.get(), MOFI, Mgr, DoAutoReset) + , MRI(std::move(MRI_)) + , AsmInfo(std::move(AsmInfo_)) {} + + friend std::unique_ptr createMCContext( + const Target *TheTarget, + StringRef TripleName, + const MCObjectFileInfo *MOFI, + const SourceMgr *Mgr, + bool DoAutoReset); +}; + +std::unique_ptr createMCContext(const Target *TheTarget, + StringRef TripleName, + const MCObjectFileInfo *MOFI = nullptr, + const SourceMgr *Mgr = nullptr, + bool DoAutoReset = true) { + decltype(OwningMCContext::MRI) + MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) + report_fatal_error("error: no register info for target " + TripleName); + + decltype(OwningMCContext::AsmInfo) + AsmInfo(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!AsmInfo) + report_fatal_error("error: no assembly info for target " + TripleName); + + return std::unique_ptr(new OwningMCContext(std::move(MRI), + std::move(AsmInfo), + MOFI, Mgr, DoAutoReset)); +} + +// +/////////////////////////////////////////// + +class HSACodeObjectDisassembler { + HSACodeObject Obj; + raw_ostream &OS; + raw_ostream &ES; + + mutable std::unique_ptr Ctx; + mutable std::unique_ptr STI; + mutable std::unique_ptr DisAsm; + mutable std::unique_ptr MII; + mutable std::unique_ptr IP; + + const amdgpu_hsa_note_isa_s* findISANote() const; + void init(const amdgpu_hsa_note_isa_s*) const; + void printHeader() const; + void printInstructions(ArrayRef Code, uint64_t Address) const; + + void print(const amd_kernel_code_t* C) const; + void print(const amdgpu_hsa_note_code_object_version_t&) const; + void print(const amdgpu_hsa_note_isa_t&) const; + + template + void print(const ErrorOr& S, StringRef ErrStr) const; + +public: + HSACodeObjectDisassembler(const ELF64LEObjectFile *Obj_, + raw_ostream &OS_, + raw_ostream &ES_) + : Obj(Obj_), OS(OS_), ES(ES_) {} + + void print() const; +}; + +const amdgpu_hsa_note_isa_s* HSACodeObjectDisassembler::findISANote() const { + for (const auto &Note : Obj.notes()) { + if (!Note) break; + if (Note->type == NT_AMDGPU_HSA_ISA) { + const auto ISANote = Note->as(); + return ISANote ? &ISANote.get() : nullptr; + } + } + return nullptr; +} + +static StringRef getCPUName(const amdgpu_hsa_note_isa_s* ISA) { + if (ISA) + switch (ISA->major) { + case 7: return "kaveri"; + case 8: + switch (ISA->minor) { + case 0: return ISA->stepping > 0 ? "carrizo" : "kaveri"; + case 1: return "stoney"; + } + break; + case 9: return "stoney"; + } + return ""; +} + +void HSACodeObjectDisassembler::init(const amdgpu_hsa_note_isa_s* ISA) const{ + const Target * const TheTarget = &TheGCNTarget; + const StringRef TripleName = "amdgcn-unknown-amdhsa"; + + Ctx = createMCContext(TheTarget, TripleName); + + STI.reset(TheTarget->createMCSubtargetInfo(TripleName, getCPUName(ISA), "")); + DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx)); + + MII.reset(TheTarget->createMCInstrInfo()); + IP.reset(TheTarget->createMCInstPrinter(Triple(TripleName), + Ctx->getAsmInfo()->getAssemblerDialect(), + *Ctx->getAsmInfo(), *MII, *Ctx->getRegisterInfo())); +} + +void HSACodeObjectDisassembler::print() const { + printHeader(); + init(findISANote()); + OS << "\n.hsatext\n\n"; + for (auto Symbol : Obj.symbols()) { + auto KernelSym = Obj.toKernelSym(Symbol); + if (!KernelSym) continue; + + OS << ".amdgpu_hsa_kernel " << *Symbol.getName() << "\n" + << *Symbol.getName() << ":\n"; + + const auto KC = Obj.getAMDKernelCodeT(KernelSym); + print(KC); + + auto Code = trimTrailingZeroes(Obj.getKernelCode(KernelSym), 256/4); + if (!Code.empty()) { + printInstructions(Code, KernelSym->getValue() + + KC->kernel_code_entry_byte_offset); + OS << '\n'; + } + } +} + +template +void HSACodeObjectDisassembler::print(const ErrorOr& S, StringRef ErrStr) const { + if (!S) { + ES << "failed to read " << ErrStr; + return; + } + print(*S); +} + +void HSACodeObjectDisassembler::printHeader() const { + for (const auto &Note : Obj.notes()) { + if (!Note) break; + switch (Note->type) { + case NT_AMDGPU_HSA_CODE_OBJECT_VERSION: + print(Note->as(), + "amdgpu_hsa_note_code_object_version_t"); + break; + case NT_AMDGPU_HSA_ISA: + print(Note->as(), + "amdgpu_hsa_note_isa_s"); + break; + default: break; + } + } +} + +void HSACodeObjectDisassembler::print( + const amdgpu_hsa_note_code_object_version_t& V) const { + OS << ".hsa_code_object_version " << V.major_version + << "," << V.minor_version << '\n'; +} + +void HSACodeObjectDisassembler::print(const amdgpu_hsa_note_isa_t& ISA) const { + OS << ".hsa_code_object_isa " + << ISA.major + << "," << ISA.minor + << "," << ISA.stepping + << ",\"" << getVendorName(ISA) + << "\",\"" << getArchName(ISA) << "\"\n"; +} + +void HSACodeObjectDisassembler::print(const amd_kernel_code_t* C) const { + OS << " .amd_kernel_code_t\n"; + dumpAmdKernelCode(C, OS, " "); + OS << " .end_amd_kernel_code_t\n"; +} + +void HSACodeObjectDisassembler::printInstructions(ArrayRef Code, + uint64_t Address) const { +#ifdef NDEBUG + const bool DebugFlag = false; +#endif + OS << "// Disassembly:\n"; + SmallString<40> InstStr, CommentStr; + uint64_t Index = 0; + while (Index < Code.size()) { + InstStr.clear(); + raw_svector_ostream IS(InstStr); + CommentStr.clear(); + raw_svector_ostream CS(CommentStr); + + MCInst Inst; + uint64_t EatenBytesNum = 0; + if (DisAsm->getInstruction(Inst, EatenBytesNum, + makeArrayRef(Code.slice(Index)), + Address, + DebugFlag ? dbgs() : nulls(), + CS)) { + IP->printInst(&Inst, IS, "", DisAsm->getSubtargetInfo()); + } else { + IS << "\t// unrecognized instruction "; + if (EatenBytesNum == 0) + EatenBytesNum = 4; + } + assert(0 == EatenBytesNum % 4); + + OS << left_justify(IS.str(), 60) << format("// %012X:", Address); + for (auto D : Code.slice(Index, EatenBytesNum / 4)) + OS << format(" %08X", D); + + if (!CS.str().empty()) + OS << " // " << CS.str(); + + OS << '\n'; + OS.flush(); + + Address += EatenBytesNum; + Index += EatenBytesNum / 4; + } +} + +namespace llvm { + +// called from llvm-objdump +void disassembleHSACodeObject(const ObjectFile *Obj, + raw_ostream &OS, + raw_ostream &ES) { + if (Obj->getArch() != Triple::amdgcn) return; + auto CObj = static_cast(Obj); + HSACodeObjectDisassembler(CObj, OS, ES).print(); +} + +} // end namespace llvm Index: test/Object/AMDGPU/hsacobj10.test =================================================================== --- /dev/null +++ test/Object/AMDGPU/hsacobj10.test @@ -0,0 +1,86 @@ +RUN: llvm-objdump -disassemble-all %p/Inputs/asm-kernel.co | FileCheck %s + +CHECK: Inputs/asm-kernel.co: file format ELF64-amdgpu-hsacobj + +CHECK: .hsa_code_object_version 1,0 +CHECK: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" + +CHECK: .hsatext + +CHECK: .amdgpu_hsa_kernel hello_world +CHECK: hello_world: +CHECK: .amd_kernel_code_t +CHECK: kernel_code_version_major = 1 +CHECK: kernel_code_version_minor = 0 +CHECK: machine_kind = 1 +CHECK: machine_version_major = 8 +CHECK: machine_version_minor = 0 +CHECK: machine_version_stepping = 3 +CHECK: kernel_code_entry_byte_offset = 256 +CHECK: kernel_code_prefetch_byte_size = 0 +CHECK: max_scratch_backing_memory_byte_size = 0 +CHECK: compute_pgm_resource_registers = 520 +CHECK: workitem_private_segment_byte_size = 0 +CHECK: workgroup_group_segment_byte_size = 0 +CHECK: gds_segment_byte_size = 0 +CHECK: kernarg_segment_byte_size = 8 +CHECK: workgroup_fbarrier_count = 0 +CHECK: wavefront_sgpr_count = 2 +CHECK: workitem_vgpr_count = 3 +CHECK: reserved_vgpr_first = 0 +CHECK: reserved_vgpr_count = 0 +CHECK: reserved_sgpr_first = 0 +CHECK: reserved_sgpr_count = 0 +CHECK: debug_wavefront_private_segment_offset_sgpr = 0 +CHECK: debug_private_segment_buffer_sgpr = 0 +CHECK: kernarg_segment_alignment = 4 +CHECK: group_segment_alignment = 4 +CHECK: private_segment_alignment = 4 +CHECK: wavefront_size = 6 +CHECK: call_convention = 0 +CHECK: runtime_loader_kernel_symbol = 0 +CHECK: compute_pgm_rsrc1_vgprs = 8 +CHECK: compute_pgm_rsrc1_sgprs = 8 +CHECK: compute_pgm_rsrc1_priority = 0 +CHECK: compute_pgm_rsrc1_float_mode = 0 +CHECK: compute_pgm_rsrc1_priv = 0 +CHECK: compute_pgm_rsrc1_dx10_clamp = 0 +CHECK: compute_pgm_rsrc1_debug_mode = 0 +CHECK: compute_pgm_rsrc1_ieee_mode = 0 +CHECK: compute_pgm_rsrc2_scratch_en = 0 +CHECK: compute_pgm_rsrc2_user_sgpr = 2 +CHECK: compute_pgm_rsrc2_tgid_x_en = 0 +CHECK: compute_pgm_rsrc2_tgid_y_en = 0 +CHECK: compute_pgm_rsrc2_tgid_z_en = 0 +CHECK: compute_pgm_rsrc2_tg_size_en = 0 +CHECK: compute_pgm_rsrc2_tidig_comp_cnt = 0 +CHECK: compute_pgm_rsrc2_excp_en_msb = 0 +CHECK: compute_pgm_rsrc2_lds_size = 0 +CHECK: compute_pgm_rsrc2_excp_en = 0 +CHECK: enable_sgpr_private_segment_buffer = 0 +CHECK: enable_sgpr_dispatch_ptr = 0 +CHECK: enable_sgpr_queue_ptr = 0 +CHECK: enable_sgpr_kernarg_segment_ptr = 1 +CHECK: enable_sgpr_dispatch_id = 0 +CHECK: enable_sgpr_flat_scratch_init = 0 +CHECK: enable_sgpr_private_segment_size = 0 +CHECK: enable_sgpr_grid_workgroup_count_x = 0 +CHECK: enable_sgpr_grid_workgroup_count_y = 0 +CHECK: enable_sgpr_grid_workgroup_count_z = 0 +CHECK: enable_ordered_append_gds = 0 +CHECK: private_element_size = 0 +CHECK: is_ptr64 = 1 +CHECK: is_dynamic_callstack = 0 +CHECK: is_debug_enabled = 0 +CHECK: is_xnack_enabled = 0 +CHECK: .end_amd_kernel_code_t +CHECK: // Disassembly: +CHECK: s_load_dwordx2 s[0:1], 0, 0x0 // 000000000100: C0060000 00000000 +CHECK: v_mov_b32_e32 v0, 0x40490fd0 // 000000000108: 7E0002FF 40490FD0 +CHECK: s_waitcnt lgkmcnt(0) // 000000000110: BF8C007F +CHECK: v_mov_b32_e32 v1, s0 // 000000000114: 7E020200 +CHECK: v_mov_b32_e32 v2, s1 // 000000000118: 7E040201 +CHECK: flat_store_dword v[1:2], v0 // 00000000011C: DC700000 00000001 +CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) // 000000000124: BF8C0000 +CHECK: s_endpgm // 000000000128: BF810000 + Index: tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- tools/llvm-objdump/llvm-objdump.cpp +++ tools/llvm-objdump/llvm-objdump.cpp @@ -831,7 +831,19 @@ return false; } +namespace llvm { + void disassembleHSACodeObject(const ObjectFile *Obj, + raw_ostream &OS, + raw_ostream &ES); +} + static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { + + if (Obj->getArch() == Triple::amdgcn) { + disassembleHSACodeObject(Obj, outs(), errs()); + return; + } + const Target *TheTarget = getTarget(Obj); // Package up features to be passed to target/subtarget