diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -435,6 +435,7 @@ EF_ARM_ABI_FLOAT_SOFT = 0x00000200U, // EABI_VER5 EF_ARM_VFP_FLOAT = 0x00000400U, // Legacy pre EABI_VER5 EF_ARM_ABI_FLOAT_HARD = 0x00000400U, // EABI_VER5 + EF_ARM_BE8 = 0x00800000U, EF_ARM_EABI_UNKNOWN = 0x00000000U, EF_ARM_EABI_VER1 = 0x01000000U, EF_ARM_EABI_VER2 = 0x02000000U, diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -424,6 +424,7 @@ BCaseMask(EF_ARM_EABI_VER3, EF_ARM_EABIMASK); BCaseMask(EF_ARM_EABI_VER4, EF_ARM_EABIMASK); BCaseMask(EF_ARM_EABI_VER5, EF_ARM_EABIMASK); + BCaseMask(EF_ARM_BE8, EF_ARM_BE8); break; case ELF::EM_MIPS: BCase(EF_MIPS_NOREORDER); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -729,6 +729,24 @@ "HardenSlsNoComdat", "true", "Generate thunk code for SLS mitigation in the normal text section">; +//===----------------------------------------------------------------------===// +// Endianness of instruction encodings in memory. +// +// In the current Arm architecture, this is usually little-endian regardless of +// data endianness. But before Armv7 it was typical for instruction endianness +// to match data endianness, so that a big-endian system was consistently big- +// endian. And Armv7-R can be configured to use big-endian instructions. +// +// Additionally, even when targeting Armv7-A, big-endian instructions can be +// found in relocatable object files, because the Arm ABI specifies that the +// linker byte-reverses them depending on the target architecture. +// +// So we have a feature here to indicate that instructions are stored big- +// endian, which you can set when instantiating an MCDisassembler. +def ModeBigEndianInstructions : SubtargetFeature<"big-endian-instructions", + "BigEndianInstructions", "true", + "Expect instructions to be stored big-endian.">; + //===----------------------------------------------------------------------===// // ARM Processor subtarget features. // diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -131,6 +131,9 @@ public: ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) { + InstructionEndianness = STI.getFeatureBits()[ARM::ModeBigEndianInstructions] + ? llvm::support::big + : llvm::support::little; } ~ARMDisassembler() override = default; @@ -156,6 +159,8 @@ DecodeStatus AddThumbPredicate(MCInst&) const; void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const; + + llvm::support::endianness InstructionEndianness; }; } // end anonymous namespace @@ -765,7 +770,8 @@ if (Bytes.size() < 2) return 2; - uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0]; + uint16_t Insn16 = llvm::support::endian::read( + Bytes.data(), InstructionEndianness); return Insn16 < 0xE800 ? 2 : 4; } @@ -794,9 +800,9 @@ return MCDisassembler::Fail; } - // Encoded as a small-endian 32-bit word in the stream. - uint32_t Insn = - (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0); + // Encoded as a 32-bit word in the stream. + uint32_t Insn = llvm::support::endian::read(Bytes.data(), + InstructionEndianness); // Calling the auto-generated decoder function. DecodeStatus Result = @@ -1084,7 +1090,8 @@ return MCDisassembler::Fail; } - uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0]; + uint16_t Insn16 = llvm::support::endian::read( + Bytes.data(), InstructionEndianness); DecodeStatus Result = decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -1138,7 +1145,8 @@ } uint32_t Insn32 = - (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16); + (uint32_t(Insn16) << 16) | llvm::support::endian::read( + Bytes.data() + 2, InstructionEndianness); Result = decodeInstruction(DecoderTableMVE32, MI, Insn32, Address, this, STI); diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/be-disasm.test b/llvm/test/tools/llvm-objdump/ELF/ARM/be-disasm.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/be-disasm.test @@ -0,0 +1,91 @@ +# RUN: yaml2obj --docnum=1 -DCONTENT=FA000002E59F100CE0800001E12FFF1E4802EB00308047703141592627182818 %s | llvm-objdump -d --triple=armv7r - | FileCheck %s +# RUN: yaml2obj --docnum=1 -DCONTENT=020000FA0C109FE5010080E01EFF2FE1024800EB803070473141592627182818 -DFLAG=,EF_ARM_BE8 %s | llvm-objdump -d --triple=armv7r - | FileCheck %s +# RUN: yaml2obj --docnum=2 -DCONTENT=FA000002E59F100CE0800001E12FFF1E4802EB00308047703141592627182818 %s | llvm-objdump -d --triple=armv7r - | FileCheck %s + +## Test llvm-objdump disassembly of all three kinds of +## AAELF32-compliant big-endian ELF file. +## +## In image files, by default AArch32 ELF stores the instructions +## big-endian ('BE32' style), unless the EF_ARM_BE8 flag is set in the +## ELF header, which indicates that instructions are stored +## little-endian ('BE8' style). llvm-objdump should detect the flag and +## handle both types, using the $a, $t and $d mapping symbols to +## distinguish Arm instructions, Thumb instructions, and data. +## +## Relocatable object files always use the BE32 style. (The linker is +## expected to byte-swap code sections, using the same the mapping +## symbols to decide how, if it's going to generate an image with BE8 +## instruction endianness and the BE8 flag set.) +## +## This test checks all three cases of this. It provides llvm-objdump +## with the BE32 and BE8 versions of the same image file, with the code +## section byte-swapped, and the EF_ARM_BE8 flag absent and present +## respectively to indicate that. We also provide a matching object +## file. We expect the identical disassembly from both, apart from the +## detail that addresses in the ELF images start at 0x8000 and section +## offsets in the object start at 0. + +# CHECK: 0: fa000002 blx +# CHECK-NEXT: 4: e59f100c ldr r1, [pc, #12] +# CHECK-NEXT: 8: e0800001 add r0, r0, r1 +# CHECK-NEXT: c: e12fff1e bx lr +# CHECK: 10: 4802 ldr r0, [pc, #8] +# CHECK-NEXT: 12: eb00 3080 add.w r0, r0, r0, lsl #14 +# CHECK-NEXT: 16: 4770 bx lr +# CHECK: 18: 31 41 59 26 .word 0x31415926 +# CHECK-NEXT: 1c: 27 18 28 18 .word 0x27182818 + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2MSB + Type: ET_EXEC + Machine: EM_ARM + Flags: [ EF_ARM_EABI_UNKNOWN[[FLAG=]] ] + Entry: 0x8000 +ProgramHeaders: + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .text + LastSec: .text + VAddr: 0x8000 + Align: 0x4 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x8000 + AddressAlign: 0x4 + Content: [[CONTENT]] +Symbols: + - Name: '$a' + Section: .text + Value: 0x8000 + - Name: '$t' + Section: .text + Value: 0x8010 + - Name: '$d' + Section: .text + Value: 0x8018 + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2MSB + Type: ET_REL + Machine: EM_ARM +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x4 + Content: [[CONTENT]] +Symbols: + - Name: '$a' + Section: .text + - Name: '$t' + Section: .text + Value: 0x10 + - Name: '$d' + Section: .text + Value: 0x18 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -690,14 +690,14 @@ OS << ' ' << format_hex_no_prefix( llvm::support::endian::read( - Bytes.data() + Pos, llvm::support::little), + Bytes.data() + Pos, InstructionEndianness), 4); } else { for (; Pos + 4 <= End; Pos += 4) OS << ' ' << format_hex_no_prefix( llvm::support::endian::read( - Bytes.data() + Pos, llvm::support::little), + Bytes.data() + Pos, InstructionEndianness), 8); } if (Pos < End) { @@ -713,6 +713,13 @@ } else OS << "\t"; } + + void setInstructionEndianness(llvm::support::endianness Endianness) { + InstructionEndianness = Endianness; + } + +private: + llvm::support::endianness InstructionEndianness = llvm::support::little; }; ARMPrettyPrinter ARMPrettyPrinterInst; @@ -1852,6 +1859,29 @@ if (MCPU.empty()) MCPU = Obj->tryGetCPUName().value_or("").str(); + if (isArmElf(*Obj)) { + // When disassembling big-endian Arm ELF, the instruction endianness is + // determined in a complex way. In relocatable objects, AAELF32 mandates + // that instruction endianness matches the ELF file endianness; in + // executable images, that's true unless the file header has the EF_ARM_BE8 + // flag, in which case instructions are little-endian regardless of data + // endianness. + // + // We must set the big-endian-instructions SubtargetFeature to make the + // disassembler read the instructions the right way round, and also tell + // our own prettyprinter to retrieve the encodings the same way to print in + // hex. + const auto *Elf32BE = dyn_cast(Obj); + + if (Elf32BE && (Elf32BE->isRelocatableObject() || + !(Elf32BE->getPlatformFlags() & ELF::EF_ARM_BE8))) { + Features.AddFeature("+big-endian-instructions"); + ARMPrettyPrinterInst.setInstructionEndianness(llvm::support::big); + } else { + ARMPrettyPrinterInst.setInstructionEndianness(llvm::support::little); + } + } + std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); if (!STI)