Index: test/tools/llvm-objdump/ARM/disassemble-arm-thumb-elf-mix.test =================================================================== --- /dev/null +++ test/tools/llvm-objdump/ARM/disassemble-arm-thumb-elf-mix.test @@ -0,0 +1,37 @@ +@RUN: llvm-objdump -d %p/Inputs/arm-thumb-mix.elf-arm | FileCheck %s + +@CHECK: foo: +@CHECK-NEXT: 80b4: 80 b5 push {r7, lr} +@CHECK-NEXT: 80b6: 00 af add r7, sp, #0 +@CHECK-NEXT: 80b8: 00 f0 22 f8 bl #68 +@CHECK-NEXT: 80bc: 40 1c adds r0, r0, #1 +@CHECK-NEXT: 80be: 80 bc pop {r7} +@CHECK-NEXT: 80c0: 02 bc pop {r1} +@CHECK-NEXT: 80c2: 8e 46 mov lr, r1 +@CHECK-NEXT: 80c4: 70 47 bx lr +@CHECK-NEXT: 80c6: 00 00 movs r0, r0 + +@CHECK: main: +@CHECK-NEXT: 80c8: 00 48 2d e9 push {r11, lr} +@CHECK-NEXT: 80cc: 0d b0 a0 e1 mov r11, sp +@CHECK-NEXT: 80d0: 08 d0 4d e2 sub sp, sp, #8 +@CHECK-NEXT: 80d4: 00 00 a0 e3 mov r0, #0 +@CHECK-NEXT: 80d8: 04 00 8d e5 str r0, [sp, #4] +@CHECK-NEXT: 80dc: 14 01 00 e3 movw r0, #276 +@CHECK-NEXT: 80e0: 01 00 40 e3 movt r0, #1 +@CHECK-NEXT: 80e4: 00 00 90 e5 ldr r0, [r0] +@CHECK-NEXT: 80e8: 00 00 8d e5 str r0, [sp] +@CHECK-NEXT: 80ec: f0 ff ff fa blx #-64 +@CHECK-NEXT: 80f0: 00 e0 9d e5 ldr lr, [sp] +@CHECK-NEXT: 80f4: 00 00 8e e0 add r0, lr, r0 +@CHECK-NEXT: 80f8: 0b d0 a0 e1 mov sp, r11 +@CHECK-NEXT: 80fc: 00 88 bd e8 pop {r11, pc} + +@CHECK: bar: +@CHECK-NEXT: 8100: 01 48 ldr r0, [pc, #4] +@CHECK-NEXT: 8102: 00 68 ldr r0, [r0] +@CHECK-NEXT: 8104: 70 47 bx lr +@CHECK-NEXT: 8106: c0 46 mov r8, r8 + +@CHECK: $d.1: +@CHECK-NEXT: 8108: 18 01 01 00 .word 0x00010118 Index: tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- tools/llvm-objdump/llvm-objdump.cpp +++ tools/llvm-objdump/llvm-objdump.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/FaultMaps.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -41,6 +42,7 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -49,6 +51,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" @@ -466,16 +469,19 @@ static bool isArmElf(const ObjectFile *Obj) { return (Obj->isELF() && - (Obj->getArch() == Triple::aarch64 || - Obj->getArch() == Triple::aarch64_be || - Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb || + (Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb || Obj->getArch() == Triple::thumb || Obj->getArch() == Triple::thumbeb)); } +static bool isAarch64Elf(const ObjectFile *Obj) { + return (Obj->isELF() && (Obj->getArch() == Triple::aarch64 || + Obj->getArch() == Triple::aarch64_be)); +} + class PrettyPrinter { public: - virtual ~PrettyPrinter(){} + virtual ~PrettyPrinter() {} virtual void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, uint64_t Address, raw_ostream &OS, StringRef Annot, @@ -585,14 +591,14 @@ }; AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst; -PrettyPrinter &selectPrettyPrinter(Triple const &Triple) { - switch(Triple.getArch()) { +PrettyPrinter *selectPrettyPrinter(Triple const &Triple) { + switch (Triple.getArch()) { default: - return PrettyPrinterInst; + return &PrettyPrinterInst; case Triple::hexagon: - return HexagonPrettyPrinterInst; + return &HexagonPrettyPrinterInst; case Triple::amdgcn: - return AMDGCNPrettyPrinterInst; + return &AMDGCNPrettyPrinterInst; } } } @@ -1065,12 +1071,166 @@ return Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); llvm_unreachable("Unsupported binary format"); } +template +static std::string getArmCpuDetails(const ELFFile *Obj, bool &ThumbOnly) { + ThumbOnly = true; + bool CheckedThumb = false; + bool CheckedArm = false; + StringRef CpuStr = StringRef(); + using Elf_Word = typename ELFFile::Elf_Word; + for (auto &Shdr : Obj->sections()) { + if (Shdr.sh_type != ELF::SHT_ARM_ATTRIBUTES) + continue; + auto Contents = Obj->getSectionContents(&Shdr); + if (!Contents) + return ""; + ArrayRef AttributeSection = *Contents; + // Check for valid attribute format + if (AttributeSection[0] != ARMBuildAttrs::Format_Version) + return ""; + size_t Offset = 1; + const uint8_t *Data = AttributeSection.data(); + // Find the aeabi Vendor sub-section + while (Offset < AttributeSection.size()) { + Data += Offset; + + // First 4 bytes are object endian length of subsection + uint32_t Length = *reinterpret_cast(Data); + + size_t SubOffset = 4; + + if (std::string("aeabi") != + reinterpret_cast(Data + SubOffset)) { + Offset += Length; + continue; + } + + SubOffset += StringRef("aeabi").size() + 1; + while (SubOffset < Length) { + uint8_t Tag = Data[SubOffset]; + SubOffset += sizeof(uint8_t); + uint32_t Size = *reinterpret_cast(Data + SubOffset); + if (Tag != ARMBuildAttrs::File) { + // Do not add size of Tag twice + SubOffset += Size - sizeof(uint8_t); + continue; + } + SubOffset += 4; + while (SubOffset < Length) { + unsigned TagLen; + uint32_t Tag = decodeULEB128(Data + SubOffset, &TagLen); + SubOffset += TagLen; + if (Tag == ARMBuildAttrs::CPU_name) { + CpuStr = + StringRef(reinterpret_cast(Data + SubOffset)); + SubOffset += CpuStr.size() + 1; + continue; + } else if (Tag == ARMBuildAttrs::ARM_ISA_use) { + ThumbOnly = false; + CheckedArm = true; + } else if (Tag == ARMBuildAttrs::THUMB_ISA_use) { + CheckedThumb = true; + } + if (CheckedArm && CheckedThumb && CpuStr.size() > 0) + return CpuStr.lower(); + uint32_t ValueLen; + decodeULEB128(Data + SubOffset, &ValueLen); + SubOffset += ValueLen; + } + Offset += Length; + } + } + } + return CpuStr.lower(); +} + +static std::string getCPU(const ObjectFile *Obj, bool &ThumbOnly) { + if (MCPU.size()) + return MCPU; + + if (Obj->isELF()) { + if (Obj->getArch() == Triple::thumb || Obj->getArch() == Triple::thumbeb || + Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb) { + if (const ELF32LEObjectFile *ELFObj = dyn_cast(Obj)) + return getArmCpuDetails(ELFObj->getELFFile(), ThumbOnly); + + // Big-endian 32-bit + if (const ELF32BEObjectFile *ELFObj = dyn_cast(Obj)) + return getArmCpuDetails(ELFObj->getELFFile(), ThumbOnly); + + return MCPU; + } + } + return ""; +} +struct DecoderContext { + std::unique_ptr MRI; + std::unique_ptr AsmInfo; + std::unique_ptr STI; + std::unique_ptr MII; + std::unique_ptr MOFI; + std::unique_ptr Ctx; + std::unique_ptr DisAsm; + std::unique_ptr MIA; + std::unique_ptr IP; + PrettyPrinter *PIP; + std::string TargetName; + + DecoderContext() + : MRI(nullptr), AsmInfo(nullptr), STI(nullptr), MII(nullptr), + MOFI(nullptr), Ctx(nullptr), DisAsm(nullptr), MIA(nullptr), IP(nullptr), + PIP(nullptr) {} + + void setContext(const Target *TheTarget, const ObjectFile *Obj, + std::string TripleName, SubtargetFeatures &Features) { + TargetName = TheTarget->getName(); + bool ThumbOnly = false; + std::string CPU = getCPU(Obj, ThumbOnly); + // A CPU with thumb only architecture cannot have ARM Subtarget + if (TargetName == "arm" && ThumbOnly) + CPU = ""; + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) + report_fatal_error("error: no register info for target " + TripleName); + + // Set up disassembler. + AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!AsmInfo) + report_fatal_error("error: no assembly info for target " + TripleName); + STI.reset(TheTarget->createMCSubtargetInfo(TripleName, CPU, + Features.getString())); + if (!STI) + report_fatal_error("error: no subtarget info for target " + TripleName); + MII.reset(TheTarget->createMCInstrInfo()); + if (!MII) + report_fatal_error("error: no instruction info for target " + TripleName); + MOFI.reset(new MCObjectFileInfo); + Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get())); + + DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx)); + if (!DisAsm) + report_fatal_error("error: no disassembler for target " + TripleName); + + MIA.reset(TheTarget->createMCInstrAnalysis(MII.get())); + + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); + IP.reset(TheTarget->createMCInstPrinter( + Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); + if (!IP) + report_fatal_error("error: no instruction printer for target " + + TripleName); + IP->setPrintImmHex(PrintImmHex); + PIP = selectPrettyPrinter(Triple(TripleName)); + } +}; static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (StartAddress > StopAddress) error("Start address should be less than stop address"); + DecoderContext Primary, Secondary; const Target *TheTarget = getTarget(Obj); + bool ThumbMode = false; // Package up features to be passed to target/subtarget SubtargetFeatures Features = Obj->getFeatures(); @@ -1078,46 +1238,26 @@ for (unsigned i = 0; i != MAttrs.size(); ++i) Features.AddFeature(MAttrs[i]); } + Primary.setContext(TheTarget, Obj, TripleName, Features); + if (isArmElf(Obj) && !isAarch64Elf(Obj)) { + // ARM ELF binaries may have mixed ARM and thumb code. There is no flag + // except the mapping symbols that mark these boundaries. Create a second + // disassembler based on what was the first(inferred by MCPU or using + // triple) + SubtargetFeatures Features; + std::string Name = TripleName == "thumb" ? "arm" : "thumb"; + llvm::Triple SecondaryTriple("unknown-unknown-unknown"); + SecondaryTriple.setTriple(Triple::normalize(Name)); + std::string Error; + const Target *SecondaryTarget = + TargetRegistry::lookupTarget("", SecondaryTriple, Error); + if (!TheTarget) + report_fatal_error("can't find target: " + Error); + Secondary.setContext(SecondaryTarget, Obj, SecondaryTriple.str(), Features); + } - std::unique_ptr MRI( - TheTarget->createMCRegInfo(TripleName)); - if (!MRI) - report_fatal_error("error: no register info for target " + TripleName); - - // Set up disassembler. - std::unique_ptr AsmInfo( - TheTarget->createMCAsmInfo(*MRI, TripleName)); - if (!AsmInfo) - report_fatal_error("error: no assembly info for target " + TripleName); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); - if (!STI) - report_fatal_error("error: no subtarget info for target " + TripleName); - std::unique_ptr MII(TheTarget->createMCInstrInfo()); - if (!MII) - report_fatal_error("error: no instruction info for target " + TripleName); - std::unique_ptr MOFI(new MCObjectFileInfo); - MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get()); - - std::unique_ptr DisAsm( - TheTarget->createMCDisassembler(*STI, Ctx)); - if (!DisAsm) - report_fatal_error("error: no disassembler for target " + TripleName); - - std::unique_ptr MIA( - TheTarget->createMCInstrAnalysis(MII.get())); - - int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); - std::unique_ptr IP(TheTarget->createMCInstPrinter( - Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); - if (!IP) - report_fatal_error("error: no instruction printer for target " + - TripleName); - IP->setPrintImmHex(PrintImmHex); - PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); - - StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ": " : - "\t\t\t%08" PRIx64 ": "; + StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ": " + : "\t\t\t%08" PRIx64 ": "; SourcePrinter SP(Obj, TheTarget->getName()); @@ -1209,24 +1349,28 @@ // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector DataMappingSymsAddr; - std::vector TextMappingSymsAddr; - if (isArmElf(Obj)) { + std::vector ThumbMappingSymsAddr; + std::vector ArmMappingSymsAddr; + std::vector AArch64MappingSymsAddr; + if (isArmElf(Obj) || isAarch64Elf(Obj)) { for (const auto &Symb : Symbols) { uint64_t Address = std::get<0>(Symb); StringRef Name = std::get<1>(Symb); if (Name.startswith("$d")) DataMappingSymsAddr.push_back(Address - SectionAddr); if (Name.startswith("$x")) - TextMappingSymsAddr.push_back(Address - SectionAddr); + AArch64MappingSymsAddr.push_back(Address - SectionAddr); if (Name.startswith("$a")) - TextMappingSymsAddr.push_back(Address - SectionAddr); + ArmMappingSymsAddr.push_back(Address - SectionAddr); if (Name.startswith("$t")) - TextMappingSymsAddr.push_back(Address - SectionAddr); + ThumbMappingSymsAddr.push_back(Address - SectionAddr); } } std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end()); - std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end()); + std::sort(ArmMappingSymsAddr.begin(), ArmMappingSymsAddr.end()); + std::sort(ThumbMappingSymsAddr.begin(), ThumbMappingSymsAddr.end()); + std::sort(AArch64MappingSymsAddr.begin(), AArch64MappingSymsAddr.end()); // Make a list of all the relocations for this section. std::vector Rels; @@ -1343,8 +1487,8 @@ // same section. We rely on the markers introduced to // understand what we need to dump. If the data marker is within a // function, it is denoted as a word/short etc - if (isArmElf(Obj) && std::get<2>(Symbols[si]) != ELF::STT_OBJECT && - !DisassembleAll) { + if ((isArmElf(Obj) || isAarch64Elf(Obj)) && + std::get<2>(Symbols[si]) != ELF::STT_OBJECT && !DisassembleAll) { uint64_t Stride = 0; auto DAI = std::lower_bound(DataMappingSymsAddr.begin(), @@ -1395,9 +1539,17 @@ } Index += Stride; outs() << "\n"; - auto TAI = std::lower_bound(TextMappingSymsAddr.begin(), - TextMappingSymsAddr.end(), Index); - if (TAI != TextMappingSymsAddr.end() && *TAI == Index) + auto TAI = std::lower_bound(ThumbMappingSymsAddr.begin(), + ThumbMappingSymsAddr.end(), Index); + auto ARMI = std::lower_bound(ArmMappingSymsAddr.begin(), + ArmMappingSymsAddr.end(), Index); + auto AArchI = + std::lower_bound(AArch64MappingSymsAddr.begin(), + AArch64MappingSymsAddr.end(), Index); + + if ((TAI != ThumbMappingSymsAddr.end() && *TAI == Index) || + (ARMI != ArmMappingSymsAddr.end() && *ARMI == Index) || + (AArchI != AArch64MappingSymsAddr.end() && *AArchI == Index)) break; } } @@ -1448,22 +1600,54 @@ if (Index >= End) break; + DecoderContext *Disassembler = nullptr; + if (isArmElf(Obj) && !isAarch64Elf(Obj)) { + auto TAI = std::lower_bound(ThumbMappingSymsAddr.begin(), + ThumbMappingSymsAddr.end(), Index); + auto ARMI = std::lower_bound(ArmMappingSymsAddr.begin(), + ArmMappingSymsAddr.end(), Index); + if (TAI != ThumbMappingSymsAddr.end() && *TAI == Index) + ThumbMode = true; + else if (ARMI != ArmMappingSymsAddr.end() && *ARMI == Index) + ThumbMode = false; + + if (ThumbMode) + Disassembler = + Primary.TargetName == "thumb" ? &Primary : &Secondary; + else + Disassembler = Primary.TargetName == "arm" ? &Primary : &Secondary; + } else { + Disassembler = &Primary; + } // Disassemble a real instruction or a data when disassemble all is // provided - bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), - SectionAddr + Index, DebugOut, - CommentStream); + bool Disassembled = Disassembler->DisAsm->getInstruction( + Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut, + CommentStream); + // If ARM, try another disassembler if the first oen failed + if (!Disassembled && isArmElf(Obj)) { + if (Disassembler == &Primary && Secondary.DisAsm) + Disassembler = &Secondary; + else if (Disassembler == &Secondary && Primary.DisAsm) + Disassembler = &Primary; + if (Disassembler->DisAsm) + Disassembled = Disassembler->DisAsm->getInstruction( + Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut, + CommentStream); + } if (Size == 0) Size = 1; - PIP.printInst(*IP, Disassembled ? &Inst : nullptr, - Bytes.slice(Index, Size), SectionAddr + Index, outs(), "", - *STI, &SP); + Disassembler->PIP->printInst( + *Disassembler->IP, Disassembled ? &Inst : nullptr, + Bytes.slice(Index, Size), SectionAddr + Index, outs(), "", + *Disassembler->STI, &SP); outs() << CommentStream.str(); Comments.clear(); // Try to resolve the target of a call, tail call, etc. to a specific // symbol. + auto MIA = Disassembler->MIA.get(); if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || MIA->isConditionalBranch(Inst))) { uint64_t Target;