Index: llvm/trunk/test/Object/AMDGPU/objdump.s =================================================================== --- llvm/trunk/test/Object/AMDGPU/objdump.s +++ llvm/trunk/test/Object/AMDGPU/objdump.s @@ -11,11 +11,14 @@ s_load_dwordx2 s[0:1], s[4:5], 0x8 s_waitcnt lgkmcnt(0) s_add_u32 s0, s7, s0 +BB0: v_add_i32_e32 v1, vcc, s0, v1 +BB1: s_movk_i32 s0, 0x483 v_cmp_ge_i32_e32 vcc, s0, v0 s_and_saveexec_b64 s[0:1], vcc v_lshlrev_b32_e32 v4, 2, v0 +BB3: s_cbranch_execz 21 s_mov_b64 s[2:3], exec s_mov_b64 s[10:11], exec @@ -30,6 +33,7 @@ s_and_saveexec_b64 s[0:1], vcc s_cbranch_execz 85 s_load_dwordx4 s[8:11], s[4:5], 0x40 +BB5: v_ashrrev_i32_e32 v77, 31, v76 v_lshlrev_b64 v[10:11], 2, v[76:77] s_waitcnt lgkmcnt(0) @@ -48,11 +52,14 @@ // CHECK: s_load_dwordx2 s[0:1], s[4:5], 0x8 // 000000000108: C0060002 00000008 // CHECK: s_waitcnt lgkmcnt(0) // 000000000110: BF8C007F // CHECK: s_add_u32 s0, s7, s0 // 000000000114: 80000007 +// CHECK: BB0: // CHECK: v_add_i32_e32 v1, vcc, s0, v1 // 000000000118: 32020200 +// CHECK: BB1: // CHECK: s_movk_i32 s0, 0x483 // 00000000011C: B0000483 // CHECK: v_cmp_ge_i32_e32 vcc, s0, v0 // 000000000120: 7D8C0000 // CHECK: s_and_saveexec_b64 s[0:1], vcc // 000000000124: BE80206A // CHECK: v_lshlrev_b32_e32 v4, 2, v0 // 000000000128: 24080082 +// CHECK: BB3: // CHECK: s_cbranch_execz 21 // 00000000012C: BF880015 // CHECK: s_mov_b64 s[2:3], exec // 000000000130: BE82017E // CHECK: s_mov_b64 s[10:11], exec // 000000000134: BE8A017E @@ -63,6 +70,7 @@ // CHECK: s_and_saveexec_b64 s[0:1], vcc // 000000000240: BE80206A // CHECK: s_cbranch_execz 85 // 000000000244: BF880055 // CHECK: s_load_dwordx4 s[8:11], s[4:5], 0x40 // 000000000248: C00A0202 00000040 +// CHECK: BB5: // CHECK: v_ashrrev_i32_e32 v77, 31, v76 // 000000000250: 229A989F // CHECK: v_lshlrev_b64 v[10:11], 2, v[76:77] // 000000000254: D28F000A 00029882 // CHECK: s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F Index: llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp +++ llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp @@ -1032,6 +1032,19 @@ return false; } +static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) { + assert(Obj->isELF()); + if (auto *Elf32LEObj = dyn_cast(Obj)) + return Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + if (auto *Elf64LEObj = dyn_cast(Obj)) + return Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + if (auto *Elf32BEObj = dyn_cast(Obj)) + return Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + if (auto *Elf64BEObj = cast(Obj)) + return Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + llvm_unreachable("Unsupported binary format"); +} + static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { const Target *TheTarget = getTarget(Obj); @@ -1096,7 +1109,7 @@ // Create a mapping from virtual address to symbol name. This is used to // pretty print the symbols while disassembling. - typedef std::vector> SectionSymbolsTy; + typedef std::vector> SectionSymbolsTy; std::map AllSymbols; for (const SymbolRef &Symbol : Obj->symbols()) { Expected AddressOrErr = Symbol.getAddress(); @@ -1113,8 +1126,15 @@ section_iterator SecI = *SectionOrErr; if (SecI == Obj->section_end()) continue; + + // For AMDGPU we need to track symbol types + uint8_t SymbolType = ELF::STT_NOTYPE; + if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { + SymbolType = getElfSymbolType(Obj, Symbol); + } + + AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType); - AllSymbols[*SecI].emplace_back(Address, *Name); } // Create a mapping from virtual address to section. @@ -1146,7 +1166,7 @@ Sec = SectionAddresses.end(); if (Sec != SectionAddresses.end()) - AllSymbols[Sec->second].emplace_back(VA, Name); + AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE); } } @@ -1170,8 +1190,8 @@ std::vector TextMappingSymsAddr; if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { for (const auto &Symb : Symbols) { - uint64_t Address = Symb.first; - StringRef Name = Symb.second; + uint64_t Address = std::get<0>(Symb); + StringRef Name = std::get<1>(Symb); if (Name.startswith("$d")) DataMappingSymsAddr.push_back(Address - SectionAddr); if (Name.startswith("$x")) @@ -1208,8 +1228,9 @@ outs() << name << ':'; // If the section has no symbol at the start, just insert a dummy one. - if (Symbols.empty() || Symbols[0].first != 0) - Symbols.insert(Symbols.begin(), std::make_pair(SectionAddr, name)); + if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) { + Symbols.insert(Symbols.begin(), std::make_tuple(SectionAddr, name, ELF::STT_NOTYPE)); + } SmallString<40> Comments; raw_svector_ostream CommentStream(Comments); @@ -1226,12 +1247,11 @@ std::vector::const_iterator rel_end = Rels.end(); // Disassemble symbol by symbol. for (unsigned si = 0, se = Symbols.size(); si != se; ++si) { - - uint64_t Start = Symbols[si].first - SectionAddr; + uint64_t Start = std::get<0>(Symbols[si]) - SectionAddr; // The end is either the section end or the beginning of the next // symbol. uint64_t End = - (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr; + (si == se - 1) ? SectSize : std::get<0>(Symbols[si + 1]) - SectionAddr; // Don't try to disassemble beyond the end of section contents. if (End > SectSize) End = SectSize; @@ -1242,16 +1262,23 @@ if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { // make size 4 bytes folded End = Start + ((End - Start) & ~0x3ull); - Start += 256; // add sizeof(amd_kernel_code_t) - // cut trailing zeroes - up to 256 bytes (align) - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) - End -= 4; + if (std::get<2>(Symbols[si]) == ELF::STT_AMDGPU_HSA_KERNEL) { + // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) + Start += 256; + } + if (si == se - 1 || + std::get<2>(Symbols[si + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) { + // cut trailing zeroes at the end of kernel + // cut up to 256 bytes + const uint64_t EndAlign = 256; + const auto Limit = End - (std::min)(EndAlign, End - Start); + while (End > Limit && + *reinterpret_cast(&Bytes[End - 4]) == 0) + End -= 4; + } } - outs() << '\n' << Symbols[si].second << ":\n"; + outs() << '\n' << std::get<1>(Symbols[si]) << ":\n"; #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); @@ -1348,8 +1375,8 @@ auto TargetSym = std::upper_bound( TargetSectionSymbols->begin(), TargetSectionSymbols->end(), Target, [](uint64_t LHS, - const std::pair &RHS) { - return LHS < RHS.first; + const std::tuple &RHS) { + return LHS < std::get<0>(RHS); }); if (TargetSym != TargetSectionSymbols->begin()) { --TargetSym;