Index: test/tools/llvm-objdump/ARM/disassemble-code-data-mix.s =================================================================== --- /dev/null +++ test/tools/llvm-objdump/ARM/disassemble-code-data-mix.s @@ -0,0 +1,36 @@ +@RUN: llvm-mc -triple arm-unknown-linux -filetype=obj %s | llvm-objdump -d - | FileCheck %s + + .cpu arm7tdmi + .global myInt + .data + .align 2 + .type myInt, %object + .size myInt, 4 +myInt: + .word 1 + .text + .align 2 + .global main + .type main, %function +main: + str fp, [sp, #-4]! + add fp, sp, #0 + ldr r3, .L3 + ldr r3, [r3] + mov r0, r3 + sub sp, fp, #0 + ldr fp, [sp], #4 + bx lr +.L4: + .align 2 +.L3: + .word myInt + .size main, .-main + .global myStr + .type myStr, %object +myStr: + .string "test string" + + +@CHECK: .word 0x00000000 +@CHECK-DAG: 74 65 73 74 20 73 74 72 test str Index: test/tools/llvm-objdump/X86/disassemble-code-data-mix.test =================================================================== --- /dev/null +++ test/tools/llvm-objdump/X86/disassemble-code-data-mix.test @@ -0,0 +1,19 @@ +// RUN: llvm-mc %s -filetype=obj -triple=x86_64-pc-linux | llvm-objdump -d - | FileCheck %s + .text + .globl foo + .type foo, @function +foo: + pushq %rbp + movq %rsp, %rbp + movl $0, %eax + popq %rbp + ret + + .globl bar + .type bar, @object +bar: + .string "test string" + +// CHECK: b: 74 65 73 74 20 73 74 72 test str +// CHECK-NEXT: 13: 69 6e 67 00 ing. + Index: tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- tools/llvm-objdump/llvm-objdump.cpp +++ tools/llvm-objdump/llvm-objdump.cpp @@ -456,6 +456,12 @@ OldLineInfo = LineInfo; } +static bool isArmElf(const ObjectFile *Obj) { + return (Obj->isELF() && + (Obj->getArch() == Triple::aarch64 || Obj->getArch() == Triple::arm || + Obj->getArch() == Triple::thumb)); +} + class PrettyPrinter { public: virtual ~PrettyPrinter(){} @@ -1094,9 +1100,10 @@ SectionRelocMap[*Sec2].push_back(Section); } - // Create a mapping from virtual address to symbol name. This is used to - // pretty print the symbols while disassembling. - typedef std::vector> SectionSymbolsTy; + // Create a mapping from virtual address to symbol name, if the symbol is + // data. This is used to pretty print the symbols while disassembling. + typedef std::vector> + SectionSymbolsTy; std::map AllSymbols; for (const SymbolRef &Symbol : Obj->symbols()) { Expected AddressOrErr = Symbol.getAddress(); @@ -1114,7 +1121,10 @@ if (SecI == Obj->section_end()) continue; - AllSymbols[*SecI].emplace_back(Address, *Name); + Expected TypeOrErr = Symbol.getType(); + error(errorToErrorCode(TypeOrErr.takeError())); + + AllSymbols[*SecI].emplace_back(std::make_tuple(Address, *Name, *TypeOrErr)); } // Create a mapping from virtual address to section. @@ -1146,7 +1156,8 @@ Sec = SectionAddresses.end(); if (Sec != SectionAddresses.end()) - AllSymbols[Sec->second].emplace_back(VA, Name); + AllSymbols[Sec->second].emplace_back( + std::make_tuple(VA, Name, SymbolRef::ST_Unknown)); } } @@ -1168,14 +1179,18 @@ SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector DataMappingSymsAddr; std::vector TextMappingSymsAddr; - if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { + if (isArmElf(Obj)) { for (const auto &Symb : Symbols) { - uint64_t Address = Symb.first; - StringRef Name = Symb.second; + uint64_t Address = std::get<0>(Symb); + StringRef Name = std::get<1>(Symb); if (Name.startswith("$d")) DataMappingSymsAddr.push_back(Address - SectionAddr); if (Name.startswith("$x")) TextMappingSymsAddr.push_back(Address - SectionAddr); + if (Name.startswith("$a")) + TextMappingSymsAddr.push_back(Address - SectionAddr); + if (Name.startswith("$t")) + TextMappingSymsAddr.push_back(Address - SectionAddr); } } @@ -1208,8 +1223,11 @@ outs() << name << ':'; // If the section has no symbol at the start, just insert a dummy one. - if (Symbols.empty() || Symbols[0].first != 0) - Symbols.insert(Symbols.begin(), std::make_pair(SectionAddr, name)); + if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) + Symbols.insert(Symbols.begin(), + std::make_tuple(SectionAddr, name, Section.isText() + ? SymbolRef::ST_Function + : SymbolRef::ST_Data)); SmallString<40> Comments; raw_svector_ostream CommentStream(Comments); @@ -1227,11 +1245,12 @@ // Disassemble symbol by symbol. for (unsigned si = 0, se = Symbols.size(); si != se; ++si) { - uint64_t Start = Symbols[si].first - SectionAddr; + uint64_t Start = std::get<0>(Symbols[si]) - SectionAddr; // The end is either the section end or the beginning of the next // symbol. - uint64_t End = - (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr; + uint64_t End = (si == se - 1) + ? SectSize + : std::get<0>(Symbols[si + 1]) - SectionAddr; // Don't try to disassemble beyond the end of section contents. if (End > SectSize) End = SectSize; @@ -1251,7 +1270,7 @@ End -= 4; } - outs() << '\n' << Symbols[si].second << ":\n"; + outs() << '\n' << std::get<1>(Symbols[si]) << ":\n"; #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); @@ -1264,8 +1283,10 @@ // AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to - // understand what we need to dump. - if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { + // understand what we need to dump. If the data marker is within a + // function, it is denoted as a word/short etc + if (isArmElf(Obj) && std::get<2>(Symbols[si]) != SymbolRef::ST_Data && + !DisassembleAll) { uint64_t Stride = 0; auto DAI = std::lower_bound(DataMappingSymsAddr.begin(), @@ -1278,15 +1299,38 @@ if (Index + 4 <= End) { Stride = 4; dumpBytes(Bytes.slice(Index, 4), outs()); - outs() << "\t.word"; + outs() << "\t.word\t"; + uint32_t Data = 0; + if (Obj->isLittleEndian()) + Data = Bytes.slice(Index, 4)[3] << 24 | + Bytes.slice(Index, 4)[2] << 16 | + Bytes.slice(Index, 4)[1] << 8 | + Bytes.slice(Index, 4)[0]; + else + Data = Bytes.slice(Index, 4)[0] << 24 | + Bytes.slice(Index, 4)[1] << 16 | + Bytes.slice(Index, 4)[2] << 8 | + Bytes.slice(Index, 4)[3]; + outs() << "0x" << format("%08" PRIx32, Data); + } else if (Index + 2 <= End) { Stride = 2; dumpBytes(Bytes.slice(Index, 2), outs()); - outs() << "\t.short"; + outs() << "\t\t.short\t"; + uint16_t Data = 0; + if (Obj->isLittleEndian()) + Data = + Bytes.slice(Index, 2)[1] << 8 | Bytes.slice(Index, 2)[0]; + else + Data = + Bytes.slice(Index, 2)[0] << 8 | Bytes.slice(Index, 2)[1]; + outs() << "0x" << format("%04" PRIx16, Data); + } else { Stride = 1; dumpBytes(Bytes.slice(Index, 1), outs()); - outs() << "\t.byte"; + outs() << "\t\t.byte\t"; + outs() << "0x" << format("%02" PRIx8, Bytes.slice(Index, 1)[0]); } Index += Stride; outs() << "\n"; @@ -1297,10 +1341,53 @@ } } } - + // If there is a data symbol inside an ELF text section and we are only + // disassembling text (applicable all architectures), + // we are in a situation where we must print the data and not + // disassemble it. + if (Obj->isELF() && std::get<2>(Symbols[si]) == SymbolRef::ST_Data && + !DisassembleAll && Section.isText()) { + // print out data up to 8 bytes at a time in hex and ascii + uint8_t AsciiData[9] = {'\0'}; + uint8_t Byte; + int NumBytes = 0; + + for (Index = Start; Index < End; Index += 1) { + if (NumBytes == 0) { + outs() << format("%8" PRIx64 ":", SectionAddr + Index); + outs() << "\t"; + } + Byte = Bytes.slice(Index)[0]; + outs() << format(" %02x", Byte); + if (isprint(Byte)) + AsciiData[NumBytes] = Byte; + else + AsciiData[NumBytes] = '.'; + + uint8_t IndentOffset = 0; + NumBytes++; + if (Index == End - 1 || NumBytes > 8) { + // Indent the space for less than 8 bytes data. + // 2 spaces for byte and one for space between bytes + IndentOffset = 3 * (8 - NumBytes); + for (int Excess = 8 - NumBytes; Excess < 8; Excess++) + AsciiData[Excess] = '\0'; + NumBytes = 8; + } + if (NumBytes == 8) { + AsciiData[8] = '\0'; + outs() << std::string(IndentOffset, ' ') << " "; + outs() << (char *)AsciiData; + outs() << '\n'; + NumBytes = 0; + } + } + } if (Index >= End) break; + // Disassemble a real instruction or a data when disassemble all is + // provided bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut, CommentStream); @@ -1347,10 +1434,10 @@ if (TargetSectionSymbols) { auto TargetSym = std::upper_bound( TargetSectionSymbols->begin(), TargetSectionSymbols->end(), - Target, [](uint64_t LHS, - const std::pair &RHS) { - return LHS < RHS.first; - }); + Target, + [](uint64_t LHS, + const std::tuple + &RHS) { return LHS < std::get<0>(RHS); }); if (TargetSym != TargetSectionSymbols->begin()) { --TargetSym; uint64_t TargetAddress = std::get<0>(*TargetSym);