diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -163,6 +163,17 @@ uint32_t addend; }; +enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 }; + +struct Ldr { + uint8_t destRegister; + uint8_t baseRegister; + uint8_t size; + bool isFloat; + ExtendType extendType; + uint64_t offset; +}; + struct PerformedReloc { const Reloc &rel; uint64_t referentVA; @@ -177,6 +188,7 @@ void applyAdrpAdd(const OptimizationHint &); void applyAdrpAdrp(const OptimizationHint &); + void applyAdrpLdr(const OptimizationHint &); private: uint8_t *buf; @@ -207,6 +219,41 @@ return true; } +static bool parseLdr(uint32_t insn, Ldr &ldr) { + ldr.destRegister = insn & 0x1f; + ldr.baseRegister = (insn >> 5) & 0x1f; + uint8_t size = insn >> 30; + uint8_t opc = (insn >> 22) & 3; + + if ((insn & 0x3fc00000) == 0x39400000) { + // LDR (immediate), LDRB (immediate), LDRH (immediate) + ldr.size = 1 << size; + ldr.extendType = ZeroExtend; + ldr.isFloat = false; + } else if ((insn & 0x3f800000) == 0x39800000) { + // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate) + ldr.size = 1 << size; + ldr.extendType = static_cast(opc); + ldr.isFloat = false; + } else if ((insn & 0x3f400000) == 0x3d400000) { + // LDR (immediate, SIMD&FP) + ldr.extendType = ZeroExtend; + ldr.isFloat = true; + if (size == 2 && opc == 1) + ldr.size = 4; + else if (size == 3 && opc == 1) + ldr.size = 8; + else if (size == 0 && opc == 3) + ldr.size = 16; + else + return false; + } else { + return false; + } + ldr.offset = ((insn >> 10) & 0xfff) * ldr.size; + return true; +} + static void writeAdr(void *loc, uint32_t dest, int32_t delta) { uint32_t opcode = 0x10000000; uint32_t immHi = (delta & 0x001ffffc) << 3; @@ -216,6 +263,28 @@ static void writeNop(void *loc) { write32le(loc, 0xd503201f); } +static void writeLiteralLdr(void *loc, Ldr original, int32_t delta) { + uint32_t imm19 = (delta << 3) & 0x00ffffe0; + uint32_t opcode = 0; + switch (original.size) { + case 4: + if (original.isFloat) + opcode = 0x1c000000; + else + opcode = original.extendType == Sign64 ? 0x98000000 : 0x18000000; + break; + case 8: + opcode = original.isFloat ? 0x5c000000 : 0x58000000; + break; + case 16: + opcode = 0x9c000000; + break; + default: + assert(false && "Invalid size for literal ldr"); + } + write32le(loc, opcode | imm19 | original.destRegister); +} + uint64_t OptimizationHintContext::getRelocTarget(const Reloc &reloc) { size_t relocIdx = &reloc - isec->relocs.data(); return relocTargets[relocIdx]; @@ -316,6 +385,45 @@ writeNop(buf + hint.offset0 + hint.delta[0]); } +// Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal) +// load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB, +// as ldr can encode a signed 19-bit offset that gets multiplied by 4. +// +// adrp xN, _foo@PAGE +// ldr xM, [xN, _foo@PAGEOFF] +// -> +// nop +// ldr xM, _foo +void OptimizationHintContext::applyAdrpLdr(const OptimizationHint &hint) { + uint32_t ins1 = read32le(buf + hint.offset0); + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Adrp adrp; + if (!parseAdrp(ins1, adrp)) + return; + Ldr ldr; + if (!parseLdr(ins2, ldr)) + return; + if (adrp.destRegister != ldr.baseRegister) + return; + + Optional rel1 = findPrimaryReloc(hint.offset0); + Optional rel2 = findReloc(hint.offset0 + hint.delta[0]); + if (!rel1 || !rel2) + return; + if (ldr.offset != (rel1->referentVA & 0xfff)) + return; + if ((rel1->referentVA & 3) != 0) + return; + if (ldr.size == 1 || ldr.size == 2) + return; + int64_t delta = rel1->referentVA - rel2->rel.offset - isec->getVA(); + if (delta >= (1 << 20) || delta < -(1 << 20)) + return; + + writeNop(buf + hint.offset0); + writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr, delta); +} + void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, ArrayRef relocTargets) const { assert(isec); @@ -332,6 +440,8 @@ // might cause its targets to be turned into NOPs. break; case LOH_ARM64_ADRP_LDR: + ctx1.applyAdrpLdr(hint); + break; case LOH_ARM64_ADRP_ADD_LDR: case LOH_ARM64_ADRP_LDR_GOT_LDR: case LOH_ARM64_ADRP_ADD_STR: diff --git a/lld/test/MachO/loh-adrp-ldr.s b/lld/test/MachO/loh-adrp-ldr.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/loh-adrp-ldr.s @@ -0,0 +1,149 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s + +.text +.align 2 +_before_far: + .space 1048576 + +.align 2 +_before_near: + .quad 0 + +.globl _main +# CHECK-LABEL: _main: +_main: +## Out of range, before +L1: adrp x0, _before_far@PAGE +L2: ldr x0, [x0, _before_far@PAGEOFF] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: ldr x0 + +## In range, before +L3: adrp x1, _before_near@PAGE +L4: ldr x1, [x1, _before_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x1, #-20 + +## Registers don't match (invalid input) +L5: adrp x2, _before_near@PAGE +L6: ldr x3, [x3, _before_near@PAGEOFF] +# CHECK-NEXT: adrp x2 +# CHECK-NEXT: ldr x3 + +## Targets don't match (invalid input) +L7: adrp x4, _before_near@PAGE +L8: ldr x4, [x4, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x4 +# CHECK-NEXT: ldr x4 + +## Not an adrp instruction +L9: udf 0 +L10: ldr x5, [x5, _after_near@PAGEOFF] +# CHECK-NEXT: udf +# CHECK-NEXT: ldr x5 + +## Not an ldr with an immediate offset +L11: adrp x6, _after_near@PAGE +L12: ldr x6, 0 +# CHECK-NEXT: adrp x6 +# CHECK-NEXT: ldr x6, #0 + +## Target is not aligned to 4 bytes +L13: adrp x7, _after_unaligned@PAGE +L14: ldr x7, [x7, _after_unaligned@PAGEOFF] +# CHECK-NEXT: adrp x7 +# CHECK-NEXT: ldr x7 + +## Byte load, unsupported +L15: adrp x8, _after_near@PAGE +L16: ldr b8, [x8, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x8 +# CHECK-NEXT: ldr b8 + +## Halfword load, unsupported +L17: adrp x9, _after_near@PAGE +L18: ldr h9, [x9, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x9 +# CHECK-NEXT: ldr h9 + +## Word load +L19: adrp x10, _after_near@PAGE +L20: ldr w10, [x10, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w10, _after_near + +## With addend +L21: adrp x11, _after_near@PAGE + 8 +L22: ldr x11, [x11, _after_near@PAGEOFF + 8] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x11 + +## Signed 32-bit read from 16-bit value, unsupported +L23: adrp x12, _after_near@PAGE +L24: ldrsb w12, [x12, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x12 +# CHECK-NEXT: ldrsb w12 + +## 64-bit load from signed 32-bit value +L25: adrp x13, _after_near@PAGE +L26: ldrsw x13, [x13, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldrsw x13, _after_near + +## Single precision FP read +L27: adrp x14, _after_near@PAGE +L28: ldr s0, [x14, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr s0, _after_near + +## Double precision FP read +L29: adrp x15, _after_near@PAGE +L30: ldr d0, [x15, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr d0, _after_near + +## Quad precision FP read +L31: adrp x16, _after_near@PAGE +L32: ldr q0, [x16, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr q0, _after_near + +## Out of range, after +L33: adrp x17, _after_far@PAGE +L34: ldr x17, [x17, _after_far@PAGEOFF] +# CHECK-NEXT: adrp x17 +# CHECK-NEXT: ldr x17 + +.data +.align 4 +_after_near: + .quad 0 + .quad 0 + .byte 0 +_after_unaligned: +.space 1048575 + +_after_far: + .quad 0 + +.loh AdrpLdr L1, L2 +.loh AdrpLdr L3, L4 +.loh AdrpLdr L5, L6 +.loh AdrpLdr L7, L8 +.loh AdrpLdr L9, L10 +.loh AdrpLdr L11, L12 +.loh AdrpLdr L13, L14 +.loh AdrpLdr L15, L16 +.loh AdrpLdr L17, L18 +.loh AdrpLdr L19, L20 +.loh AdrpLdr L21, L22 +.loh AdrpLdr L23, L24 +.loh AdrpLdr L25, L26 +.loh AdrpLdr L27, L28 +.loh AdrpLdr L29, L30 +.loh AdrpLdr L31, L32 +.loh AdrpLdr L33, L34