diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -226,6 +226,8 @@ When printing a PC-relative global symbol reference, print it as an offset from the leading symbol. + When a bb-address-map section is present (i.e., the object file is built with ``-fbasic-block-sections=labels``), labels are retrieved from that section instead. + Only works with PowerPC objects or X86 linked images. Example: diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -102,6 +102,12 @@ /// Returns a vector containing a symbol version for each dynamic symbol. /// Returns an empty vector if version sections do not exist. Expected> readDynsymVersions() const; + + /// Returns a vector of all BB address maps in the object file. When + // `TextSectionIndex` is specified, only returns the BB address maps + // corresponding to the section with that index. + Expected> + readBBAddrMap(Optional TextSectionIndex = None) const; }; class ELFSectionRef : public SectionRef { diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -812,8 +812,20 @@ : Offset(Offset), Size(Size), HasReturn(Metadata & 1), HasTailCall(Metadata & (1 << 1)), IsEHPad(Metadata & (1 << 2)), CanFallThrough(Metadata & (1 << 3)){}; + + bool operator==(const BBEntry &Other) const { + return Offset == Other.Offset && Size == Other.Size && + HasReturn == Other.HasReturn && HasTailCall == Other.HasTailCall && + IsEHPad == Other.IsEHPad && CanFallThrough == Other.CanFallThrough; + } }; std::vector BBEntries; // Basic block entries for this function. + + // Equality operator for unit testing. + bool operator==(const BBAddrMap &Other) const { + return Addr == Other.Addr && std::equal(BBEntries.begin(), BBEntries.end(), + Other.BBEntries.begin()); + } }; } // end namespace object. diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -670,6 +670,35 @@ return Result; } +template +Expected> +readBBAddrMapImpl(const ELFFile &EF, + Optional TextSectionIndex) { + using Elf_Shdr = typename ELFT::Shdr; + std::vector BBAddrMaps; + const auto &Sections = cantFail(EF.sections()); + for (const Elf_Shdr &Sec : Sections) { + if (Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP) + continue; + if (TextSectionIndex) { + Expected TextSecOrErr = EF.getSection(Sec.sh_link); + if (!TextSecOrErr) + return createError("unable to get the linked-to section for " + + describe(EF, Sec) + ": " + + toString(TextSecOrErr.takeError())); + if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr)) + continue; + } + Expected> BBAddrMapOrErr = EF.decodeBBAddrMap(Sec); + if (!BBAddrMapOrErr) + return createError("unable to read " + describe(EF, Sec) + ": " + + toString(BBAddrMapOrErr.takeError())); + std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(), + std::back_inserter(BBAddrMaps)); + } + return BBAddrMaps; +} + template static Expected> readDynsymVersionsImpl(const ELFFile &EF, @@ -738,3 +767,17 @@ return readDynsymVersionsImpl(cast(this)->getELFFile(), Symbols); } + +Expected> +ELFObjectFileBase::readBBAddrMap(Optional TextSectionIndex) const { + if (const auto *Obj = dyn_cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + if (const auto *Obj = dyn_cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + if (const auto *Obj = dyn_cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + if (const auto *Obj = cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + else + llvm_unreachable("Unsupported binary format"); +} diff --git a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml @@ -0,0 +1,172 @@ +## Test that in the presence of SHT_LLVM_BB_ADDR_MAP sections, +## --symbolize-operands can display labels. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-objdump %t1 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefix=INTEL +# RUN: llvm-objdump %t1 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefix=ATT + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: llvm-objdump %t2 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefixes=INTEL,INTEL-MULTISECTION +# RUN: llvm-objdump %t2 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefixes=ATT,ATT-MULTISECTION + +## Expect to find the branch and basic block labels and global variable name. +# ATT: : +# ATT-NEXT: : +# ATT-NEXT: pushq %rax +# ATT-NEXT: : +# ATT-NEXT: cmpl , %eax +# ATT-NEXT: nop +# ATT-NEXT: : +# ATT-NEXT: jge +# ATT-NEXT: jmp +# ATT-NEXT: : +# ATT-NEXT: retq +# ATT-MULTISECTION: : +# ATT-MULTISECTION-NEXT: : +# ATT-MULTISECTION-NEXT: pushq %rax +# ATT-MULTISECTION-NEXT: movl %edx, %eax +# ATT-MULTISECTION-NEXT: je +# ATT-MULTISECTION-NEXT: : +# ATT-MULTISECTION-NEXT: xorl %esi, %esi +# ATT-MULTISECTION-NEXT: : +# ATT-MULTISECTION-NEXT: callq +# ATT-MULTISECTION-NEXT: retq + +# INTEL: : +# INTEL-NEXT: : +# INTEL-NEXT: push rax +# INTEL-NEXT: : +# INTEL-NEXT: cmp eax, dword ptr +# INTEL-NEXT: nop +# INTEL-NEXT: : +# INTEL-NEXT: jge +# INTEL-NEXT: jmp +# INTEL-NEXT: : +# INTEL-NEXT: ret +# INTEL-MULTISECTION: : +# INTEL-MULTISECTION-NEXT: : +# INTEL-MULTISECTION-NEXT: push rax +# INTEL-MULTISECTION-NEXT: mov eax, edx +# INTEL-MULTISECTION-NEXT: je +# INTEL-MULTISECTION-NEXT: : +# INTEL-MULTISECTION-NEXT: xor esi, esi +# INTEL-MULTISECTION-NEXT: : +# INTEL-MULTISECTION-NEXT: call +# INTEL-MULTISECTION-NEXT: ret + +## This object file contains a text section, a SHT_LLVM_BB_ADDR_MAP section +## linked to it, and a data section. +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Address: 0x4000 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '503b0505100000907d02ebf5c3' + - Name: .data + Type: SHT_PROGBITS + Flags: [SHF_ALLOC, SHF_WRITE] + Address: 0x5000 + - Name: bb_addr_map_1 + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text + Entries: + - Address: 0x4000 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - AddressOffset: 0x1 + Size: 0x6 + Metadata: 0x0 + - AddressOffset: 0x8 + Size: 0x3 + Metadata: 0x0 + - AddressOffset: 0xc + Size: 0x1 + Metadata: 0x2 +Symbols: + - Name: foo + Section: .text + Value: 0x4000 + - Name: symbol + Section: .data + Value: 0x500c + +## This object file contains a separate text section and SHT_LLVM_BB_ADDR_MAP +## section for each of the two functions foo and bar. foo's section contents +## are identical to the ones above. +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text.foo + Type: SHT_PROGBITS + Address: 0x4000 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '503b0505200000907d02ebf5c3' + - Name: .text.bar + Type: SHT_PROGBITS + Address: 0x5000 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '5089d0740231f6e8f4ffffffc3' + - Name: .data + Type: SHT_PROGBITS + Flags: [SHF_ALLOC, SHF_WRITE] + Address: 0x6000 + - Name: bb_addr_map.foo + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.foo + Entries: + - Address: 0x4000 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - AddressOffset: 0x1 + Size: 0x6 + Metadata: 0x0 + - AddressOffset: 0x8 + Size: 0x3 + Metadata: 0x0 + - AddressOffset: 0xc + Size: 0x1 + Metadata: 0x2 + - Name: bb_addr_map.bar + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.bar + Entries: + - Address: 0x5000 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - AddressOffset: 0x5 + Size: 0x2 + Metadata: 0x0 + - AddressOffset: 0x7 + Size: 0x6 + Metadata: 0x0 + +Symbols: + - Name: foo + Section: .text.foo + Value: 0x4000 + - Name: bar + Section: .text.bar + Value: 0x5000 + - Name: symbol + Section: .data + Value: 0x600c diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -53,6 +53,7 @@ #include "llvm/Object/COFF.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" #include "llvm/Object/FaultMapParser.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" @@ -982,11 +983,29 @@ } static void -collectLocalBranchTargets(ArrayRef Bytes, const MCInstrAnalysis *MIA, - MCDisassembler *DisAsm, MCInstPrinter *IP, - const MCSubtargetInfo *STI, uint64_t SectionAddr, - uint64_t Start, uint64_t End, - std::unordered_map &Labels) { +collectBBAddrMapLabels(const std::unordered_map &AddrToBBAddrMap, + uint64_t SectionAddr, uint64_t Start, uint64_t End, + std::unordered_map> &Labels) { + if (AddrToBBAddrMap.empty()) + return; + Labels.clear(); + uint64_t StartAddress = SectionAddr + Start; + uint64_t EndAddress = SectionAddr + End; + auto Iter = AddrToBBAddrMap.find(StartAddress); + if (Iter == AddrToBBAddrMap.end()) + return; + for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) { + uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr; + if (BBAddress >= EndAddress) + continue; + Labels[BBAddress].push_back(("BB" + Twine(I)).str()); + } +} + +static void collectLocalBranchTargets( + ArrayRef Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm, + MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr, + uint64_t Start, uint64_t End, std::unordered_map &Labels) { // So far only supports PowerPC and X86. if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86()) return; @@ -1015,7 +1034,6 @@ !(STI->getTargetTriple().isPPC() && Target == Index)) Labels[Target] = ("L" + Twine(LabelCount++)).str(); } - Index += Size; } } @@ -1250,6 +1268,20 @@ if (!SectSize) continue; + std::unordered_map AddrToBBAddrMap; + if (SymbolizeOperands) { + if (auto *Elf = dyn_cast(Obj)) { + // Read the BB-address-map corresponding to this section, if present. + auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex()); + if (!SectionBBAddrMapsOrErr) + reportWarning(toString(SectionBBAddrMapsOrErr.takeError()), + Obj->getFileName()); + for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr) + AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, + std::move(FunctionBBAddrMap)); + } + } + // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector MappingSymbols; @@ -1413,9 +1445,13 @@ formatted_raw_ostream FOS(outs()); std::unordered_map AllLabels; - if (SymbolizeOperands) + std::unordered_map> BBAddrMapLabels; + if (SymbolizeOperands) { collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI, SectionAddr, Index, End, AllLabels); + collectBBAddrMapLabels(AddrToBBAddrMap, SectionAddr, Index, End, + BBAddrMapLabels); + } while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the @@ -1459,9 +1495,15 @@ } // Print local label if there's any. - auto Iter = AllLabels.find(SectionAddr + Index); - if (Iter != AllLabels.end()) - FOS << "<" << Iter->second << ">:\n"; + auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index); + if (Iter1 != BBAddrMapLabels.end()) { + for (StringRef Label : Iter1->second) + FOS << "<" << Label << ">:\n"; + } else { + auto Iter2 = AllLabels.find(SectionAddr + Index); + if (Iter2 != AllLabels.end()) + FOS << "<" << Iter2->second << ">:\n"; + } // Disassemble a real instruction or a data when disassemble all is // provided @@ -1556,6 +1598,7 @@ } // Print the labels corresponding to the target if there's any. + bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target); bool LabelAvailable = AllLabels.count(Target); if (TargetSym != nullptr) { uint64_t TargetAddress = TargetSym->Addr; @@ -1569,14 +1612,18 @@ // Always Print the binary symbol precisely corresponding to // the target address. *TargetOS << TargetName; - } else if (!LabelAvailable) { + } else if (BBAddrMapLabelAvailable) { + *TargetOS << BBAddrMapLabels[Target].front(); + } else if (LabelAvailable) { + *TargetOS << AllLabels[Target]; + } else { // Always Print the binary symbol plus an offset if there's no // local label corresponding to the target address. *TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp); - } else { - *TargetOS << AllLabels[Target]; } *TargetOS << ">"; + } else if (BBAddrMapLabelAvailable) { + *TargetOS << " <" << BBAddrMapLabels[Target].front() << ">"; } else if (LabelAvailable) { *TargetOS << " <" << AllLabels[Target] << ">"; } diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -497,7 +497,7 @@ } // Tests for error paths of the ELFFile::decodeBBAddrMap API. -TEST(ELFObjectFileTest, InvalidBBAddrMap) { +TEST(ELFObjectFileTest, InvalidDecodeBBAddrMap) { StringRef CommonYamlString(R"( --- !ELF FileHeader: @@ -604,6 +604,122 @@ "ULEB128 value at offset 0x8 exceeds UINT32_MAX (0x100000000)"); } +// Test for the ELFObjectFile::readBBAddrMap API. +TEST(ELFObjectFileTest, ReadBBAddrMap) { + StringRef CommonYamlString(R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map_1 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 1 + Entries: + - Address: 0x11111 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + - Name: .llvm_bb_addr_map_2 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 1 + Entries: + - Address: 0x22222 + BBEntries: + - AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + # Link: 0 (by default) + Entries: + - Address: 0x33333 + BBEntries: + - AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x6 +)"); + + BBAddrMap E1 = {0x11111, {{0x0, 0x1, 0x2}}}; + BBAddrMap E2 = {0x22222, {{0x0, 0x2, 0x4}}}; + BBAddrMap E3 = {0x33333, {{0x0, 0x3, 0x6}}}; + + std::vector Section0BBAddrMaps = {E3}; + std::vector Section1BBAddrMaps = {E1, E2}; + std::vector AllBBAddrMaps = {E1, E2, E3}; + + auto DoCheckSucceeds = [&](StringRef YamlString, + Optional TextSectionIndex, + std::vector ExpectedResult) { + SmallString<0> Storage; + Expected> ElfOrErr = + toBinary(Storage, YamlString); + ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded()); + + Expected BBAddrMapSecOrErr = + ElfOrErr->getELFFile().getSection(1); + ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); + auto BBAddrMaps = ElfOrErr->readBBAddrMap(TextSectionIndex); + EXPECT_THAT_EXPECTED(BBAddrMaps, Succeeded()); + EXPECT_EQ(*BBAddrMaps, ExpectedResult); + }; + + auto DoCheckFails = [&](StringRef YamlString, + Optional TextSectionIndex, + const char *ErrMsg) { + SmallString<0> Storage; + Expected> ElfOrErr = + toBinary(Storage, YamlString); + ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded()); + + Expected BBAddrMapSecOrErr = + ElfOrErr->getELFFile().getSection(1); + ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); + EXPECT_THAT_ERROR(ElfOrErr->readBBAddrMap(TextSectionIndex).takeError(), + FailedWithMessage(ErrMsg)); + }; + + // Check that we can retrieve the data in the normal case. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/None, AllBBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section1BBAddrMaps); + // Check that when no bb-address-map section is found for a text section, + // we return an empty result. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, {}); + + // Check that we detect when a bb-addr-map section is linked to an invalid + // (not present) section. + SmallString<128> InvalidLinkedYamlString(CommonYamlString); + InvalidLinkedYamlString += R"( + Link: 10 +)"; + + DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/1, + "unable to get the linked-to section for SHT_LLVM_BB_ADDR_MAP " + "section with index 3: invalid section index: 10"); + // Linked sections are not checked when we don't target a specific text + // section. + DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/None, + AllBBAddrMaps); + + // Check that we can detect when bb-address-map decoding fails. + SmallString<128> TruncatedYamlString(CommonYamlString); + TruncatedYamlString += R"( + ShSize: 0x8 +)"; + + DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/None, + "unable to read SHT_LLVM_BB_ADDR_MAP section with index 3: " + "unable to decode LEB128 at offset 0x00000008: malformed " + "uleb128, extends past end"); + // Check that we can read the other section's bb-address-maps which are + // valid. + DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/1, + Section1BBAddrMaps); +} + // Test for ObjectFile::getRelocatedSection: check that it returns a relocated // section for executable and relocatable files. TEST(ELFObjectFileTest, ExecutableWithRelocs) {