diff --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h --- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h @@ -15,6 +15,7 @@ #ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H #define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include #include @@ -75,6 +76,17 @@ virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) = 0; + + /// Get the MCSymbolizer's list of addresses that were referenced by + /// symbolizable operands but not resolved to a symbol. The caller (some + /// code that is disassembling a section or other chunk of code) would + /// typically create a synthetic label at each address and add them to its + /// list of symbols in the section, before creating a new MCSymbolizer with + /// the enhanced symbol list and retrying disassembling the section. + /// The returned array is unordered and may have duplicates. + /// The returned ArrayRef stops being valid on any call to or destruction of + /// the MCSymbolizer object. + virtual ArrayRef getReferencedAddresses() const { return {}; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -183,6 +183,7 @@ class AMDGPUSymbolizer : public MCSymbolizer { private: void *DisInfo; + std::vector ReferencedAddresses; public: AMDGPUSymbolizer(MCContext &Ctx, std::unique_ptr &&RelInfo, @@ -197,6 +198,10 @@ void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override; + + ArrayRef getReferencedAddresses() const override { + return ReferencedAddresses; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1834,6 +1834,8 @@ Inst.addOperand(MCOperand::createExpr(Add)); return true; } + // Add to list of referenced addresses, so caller can synthesize a label. + ReferencedAddresses.push_back(static_cast(Value)); return false; } diff --git a/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml @@ -0,0 +1,89 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s + +## Expect to find the branch labels. +# CHECK: : +# CHECK: s_branch L1 +# CHECK: : +# CHECK: s_cbranch_execz L2 +# CHECK: : +# CHECK: s_branch L0 +# CHECKL : + +# I created this YAML starting with this LLVM IR: +# +# define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { +# entry: +# br label %loop +# loop: +# %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ] +# %tmp23 = add nuw i32 %tmp23phi, 1 +# %tmp27 = icmp ult i32 %arg, %tmp23 +# br i1 %tmp27, label %then, label %endif +# then: ; preds = %bb +# call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0) +# br label %endif +# endif: ; preds = %bb28, %bb +# br i1 %breakcond, label %loop, label %loopexit +# loopexit: +# ret void +# } +# +# declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0 +# +# attributes #0 = { nounwind writeonly } +# +# I compiled it to a relocatable ELF: +# +# llc -march=amdgcn -mcpu=gfx1030 llvm/a.ll -filetype=obj -o a.elf +# +# then converted it to YAML: +# +# obj2yaml a.elf +# +# then manually removed the BB0_1 etc local symbols. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x4 + Content: 00008CBF0000FDBB81020236810385BE800384BE8102847D6AC10689040082BF7E077E88058105817E047E8A080088BF0500887D7E060787070404886A3C87BEF7FF88BF000070E000000104F4FF82BF7E047E880000FDBB1E2080BE + - Name: .AMDGPU.config + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 48B80000000000004CB800000000000060B800000000000004000000000000000800000000000000 + - Name: .note.GNU-stack + Type: SHT_PROGBITS + AddressAlign: 0x1 + - Name: .note + Type: SHT_NOTE + AddressAlign: 0x4 + Notes: + - Name: AMD + Desc: 616D6467636E2D756E6B6E6F776E2D6C696E75782D676E752D67667831303330 + Type: NT_FREEBSD_PROCSTAT_GROUPS + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .shstrtab + - Name: .text + - Name: .AMDGPU.config + - Name: .note.GNU-stack + - Name: .note + - Name: .symtab +Symbols: + - Name: break_cond_is_arg + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Size: 0x5C +... diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1499,6 +1499,62 @@ } } +// Create an MCSymbolizer for the target and add it to the MCDisassembler. +// This is currently only used on AMDGPU, and assumes the format of the +// void * argument passed to AMDGPU's createMCSymbolizer. +static void addSymbolizer(MCContext &Ctx, const Target *Target, + StringRef TripleName, MCDisassembler *DisAsm, + uint64_t SectionAddr, ArrayRef Bytes, + SectionSymbolsTy &Symbols, + std::vector &SynthesizedLabelNames) { + + std::unique_ptr RelInfo( + Target->createMCRelocationInfo(TripleName, Ctx)); + if (!RelInfo) + return; + std::unique_ptr Symbolizer(Target->createMCSymbolizer( + TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo))); + MCSymbolizer *SymbolizerPtr = &*Symbolizer; + DisAsm->setSymbolizer(std::move(Symbolizer)); + + if (!SymbolizeOperands) + return; + + // Synthesize labels referenced by branch instructions by + // disassembling, discarding the output, and collecting the referenced + // addresses from the symbolizer. + for (size_t Index = 0; Index != Bytes.size();) { + MCInst Inst; + uint64_t Size; + DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index, + nulls()); + if (Size == 0) + Size = 1; + Index += Size; + } + ArrayRef LabelAddrsRef = SymbolizerPtr->getReferencedAddresses(); + // Copy and sort to remove duplicates. + std::vector LabelAddrs; + LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(), + LabelAddrsRef.end()); + llvm::sort(LabelAddrs); + LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) - + LabelAddrs.begin()); + // Add the labels. + for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) { + SynthesizedLabelNames.push_back( + new std::string((Twine("L") + Twine(LabelNum)).str())); + Symbols.push_back(SymbolInfoTy( + LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE)); + } + llvm::stable_sort(Symbols); + // Recreate the symbolizer with the new symbols list. + RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx)); + Symbolizer.reset(Target->createMCSymbolizer( + TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo))); + DisAsm->setSymbolizer(std::move(Symbolizer)); +} + static StringRef getSegmentName(const MachOObjectFile *MachO, const SectionRef &Section) { if (MachO) { @@ -1661,16 +1717,14 @@ llvm::sort(MappingSymbols); + ArrayRef Bytes = arrayRefFromStringRef( + unwrapOrError(Section.getContents(), Obj->getFileName())); + + std::vector SynthesizedLabelNames; if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { // AMDGPU disassembler uses symbolizer for printing labels - std::unique_ptr RelInfo( - TheTarget->createMCRelocationInfo(TripleName, Ctx)); - if (RelInfo) { - std::unique_ptr Symbolizer( - TheTarget->createMCSymbolizer( - TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo))); - DisAsm->setSymbolizer(std::move(Symbolizer)); - } + addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes, + Symbols, SynthesizedLabelNames); } StringRef SegmentName = getSegmentName(MachO, Section); @@ -1686,9 +1740,6 @@ SmallString<40> Comments; raw_svector_ostream CommentStream(Comments); - ArrayRef Bytes = arrayRefFromStringRef( - unwrapOrError(Section.getContents(), Obj->getFileName())); - uint64_t VMAAdjustment = 0; if (shouldAdjustVA(Section)) VMAAdjustment = AdjustVMA;