diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -184,6 +184,7 @@ void applyAdrpAdrp(const OptimizationHint &); void applyAdrpLdr(const OptimizationHint &); void applyAdrpLdrGot(const OptimizationHint &); + void applyAdrpAddLdr(const OptimizationHint &); void applyAdrpLdrGotLdr(const OptimizationHint &); private: @@ -467,80 +468,109 @@ applyAdrpLdr(hint); } -// Relaxes a GOT-indirect load. -// If the referenced symbol is external and its GOT entry is within +/- 1 MiB, -// the GOT entry can be loaded with a single literal ldr instruction. -// If the referenced symbol is local, its address may be loaded directly if it's -// close enough, or with an adr(p) + ldr pair if it's not. -void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) { +// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's +// address by loading directly if it's close enough, or to an adrp(p)+ldr +// sequence if it's not. +// +// adrp x0, _foo@PAGE +// add x1, x0, _foo@PAGEOFF +// ldr x2, [x1, #off] +void OptimizationHintContext::applyAdrpAddLdr(const OptimizationHint &hint) { uint32_t ins1 = read32le(buf + hint.offset0); Adrp adrp; if (!parseAdrp(ins1, adrp)) return; + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Add add; + if (!parseAdd(ins2, add)) + return; uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]); - Ldr ldr3; - if (!parseLdr(ins3, ldr3)) + Ldr ldr; + if (!parseLdr(ins3, ldr)) return; - uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); - Ldr ldr2; - Add add2; Optional rel1 = findPrimaryReloc(hint.offset0); Optional rel2 = findReloc(hint.offset0 + hint.delta[0]); if (!rel1 || !rel2) return; - if (parseAdd(ins2, add2)) { - // adrp x0, _foo@PAGE - // add x1, x0, _foo@PAGEOFF - // ldr x2, [x1, #off] + if (adrp.destRegister != add.srcRegister) + return; + if (add.destRegister != ldr.baseRegister) + return; - if (adrp.destRegister != add2.srcRegister) - return; - if (add2.destRegister != ldr3.baseRegister) - return; + // Load from the target address directly. + // nop + // nop + // ldr x2, [_foo + #off] + uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA(); + Ldr literalLdr = ldr; + literalLdr.offset += rel1->referentVA - rel3VA; + if (isLiteralLdrEligible(literalLdr)) { + writeNop(buf + hint.offset0); + writeNop(buf + hint.offset0 + hint.delta[0]); + writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr); + return; + } - // Load from the target address directly. - // nop - // nop - // ldr x2, [_foo + #off] - uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA(); - Ldr literalLdr = ldr3; - literalLdr.offset += rel1->referentVA - rel3VA; - if (isLiteralLdrEligible(literalLdr)) { - writeNop(buf + hint.offset0); - writeNop(buf + hint.offset0 + hint.delta[0]); - writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr); - return; - } + // Load the target address into a register and load from there indirectly. + // adr x1, _foo + // nop + // ldr x2, [x1, #off] + int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA(); + if (isValidAdrOffset(adrOffset)) { + writeAdr(buf + hint.offset0, ldr.baseRegister, adrOffset); + // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but + // not for AdrpLdrGotLdr. Its effect is the same either way. + writeNop(buf + hint.offset0 + hint.delta[0]); + return; + } - // Load the target address into a register and load from there indirectly. - // adr x1, _foo - // nop - // ldr x2, [x1, #off] - int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA(); - if (isValidAdrOffset(adrOffset)) { - writeAdr(buf + hint.offset0, ldr3.baseRegister, adrOffset); - writeNop(buf + hint.offset0 + hint.delta[0]); - return; - } + // Move the target's page offset into the ldr's immediate offset. + // adrp x0, _foo@PAGE + // nop + // ldr x2, [x0, _foo@PAGEOFF + #off] + Ldr immediateLdr = ldr; + immediateLdr.baseRegister = adrp.destRegister; + immediateLdr.offset += add.addend; + if (isImmediateLdrEligible(immediateLdr)) { + writeNop(buf + hint.offset0 + hint.delta[0]); + writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr); + return; + } +} - // Move the target's page offset into the ldr's immediate offset. - // adrp x0, _foo@PAGE - // nop - // ldr x2, [x0, _foo@PAGEOFF + #off] - Ldr immediateLdr = ldr3; - immediateLdr.baseRegister = adrp.destRegister; - immediateLdr.offset += add2.addend; - if (isImmediateLdrEligible(immediateLdr)) { - writeNop(buf + hint.offset0 + hint.delta[0]); - writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr); - return; - } +// Relaxes a GOT-indirect load. +// If the referenced symbol is external and its GOT entry is within +/- 1 MiB, +// the GOT entry can be loaded with a single literal ldr instruction. +// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr, +// we perform the AdrpAddLdr transformation. +void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) { + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Add add; + Ldr ldr2; + + if (parseAdd(ins2, add)) { + applyAdrpAddLdr(hint); } else if (parseLdr(ins2, ldr2)) { // adrp x1, _foo@GOTPAGE // ldr x2, [x1, _foo@GOTPAGEOFF] // ldr x3, [x2, #off] + + uint32_t ins1 = read32le(buf + hint.offset0); + Adrp adrp; + if (!parseAdrp(ins1, adrp)) + return; + uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]); + Ldr ldr3; + if (!parseLdr(ins3, ldr3)) + return; + + Optional rel1 = findPrimaryReloc(hint.offset0); + Optional rel2 = findReloc(hint.offset0 + hint.delta[0]); + if (!rel1 || !rel2) + return; + if (ldr2.baseRegister != adrp.destRegister) return; if (ldr3.baseRegister != ldr2.destRegister) @@ -581,7 +611,7 @@ ctx1.applyAdrpLdr(hint); break; case LOH_ARM64_ADRP_ADD_LDR: - // TODO: Implement this + ctx1.applyAdrpAddLdr(hint); break; case LOH_ARM64_ADRP_LDR_GOT_LDR: ctx1.applyAdrpLdrGotLdr(hint); diff --git a/lld/test/MachO/loh-adrp-add-ldr.s b/lld/test/MachO/loh-adrp-add-ldr.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/loh-adrp-add-ldr.s @@ -0,0 +1,185 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s + +## This is mostly a copy of loh-adrp-ldr-got-ldr.s's `local.s` test, except that Adrp+Ldr+Ldr +## triples have been changed to Adrp+Add+Ldr. The performed optimization is the same. +.text +.align 2 +.globl _main +_main: + +### Transformation to a literal LDR +## Basic case +L1: adrp x0, _close@PAGE +L2: add x1, x0, _close@PAGEOFF +L3: ldr x2, [x1] +# CHECK-LABEL: _main: +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +## Load with offset +L4: adrp x0, _close@PAGE +L5: add x1, x0, _close@PAGEOFF +L6: ldr x2, [x1, #8] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +## 32 bit load +L7: adrp x0, _close@PAGE +L8: add x1, x0, _close@PAGEOFF +L9: ldr w1, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w1, _close + +## Floating point +L10: adrp x0, _close@PAGE +L11: add x1, x0, _close@PAGEOFF +L12: ldr s1, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr s1, _close + +L13: adrp x0, _close@PAGE +L14: add x1, x0, _close@PAGEOFF +L15: ldr d1, [x1, #8] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr d1, _close8 + +L16: adrp x0, _close@PAGE +L17: add x1, x0, _close@PAGEOFF +L18: ldr q0, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr q0, _close + + +### Transformation to ADR+LDR +## 1 byte floating point load +L19: adrp x0, _close@PAGE +L20: add x1, x0, _close@PAGEOFF +L21: ldr b2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr b2, [x1] + +## 1 byte GPR load, zero extend +L22: adrp x0, _close@PAGE +L23: add x1, x0, _close@PAGEOFF +L24: ldrb w2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldrb w2, [x1] + +## 1 byte GPR load, sign extend +L25: adrp x0, _close@PAGE +L26: add x1, x0, _close@PAGEOFF +L27: ldrsb x2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldrsb x2, [x1] + +## Unaligned +L28: adrp x0, _unaligned@PAGE +L29: add x1, x0, _close@PAGEOFF +L30: ldr x2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2, [x1] + + +### Transformation to ADRP + immediate LDR +## Basic test: target is far +L31: adrp x0, _far@PAGE +L32: add x1, x0, _far@PAGEOFF +L33: ldr x2, [x1] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +## With offset +L34: adrp x0, _far@PAGE +L35: add x1, x0, _far@PAGEOFF +L36: ldr x2, [x1, #8] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +### No changes +## Far and unaligned +L37: adrp x0, _far_unaligned@PAGE +L38: add x1, x0, _far_unaligned@PAGEOFF +L39: ldr x2, [x1] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldr x2, [x1] + +## Far with large offset (_far_offset@PAGE + #255 > 4095) +L40: adrp x0, _far_offset@PAGE +L41: add x1, x0, _far_offset@PAGEOFF +L42: ldrb w2, [x1, #255] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldrb w2, [x1, #255] + +### Invalid inputs; the instructions should be left untouched. +## Registers don't match +L43: adrp x0, _far@PAGE +L44: add x1, x0, _far@PAGEOFF +L45: ldr x2, [x2] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldr x2, [x2] + +## Targets don't match +L46: adrp x0, _close@PAGE +L47: add x1, x0, _close8@PAGEOFF +L48: ldr x2, [x1] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldr x2, [x1] + +.data +.align 4 + .quad 0 +_close: + .quad 0 +_close8: + .quad 0 + .byte 0 +_unaligned: + .quad 0 + +.space 1048576 +.align 12 + .quad 0 +_far: + .quad 0 + .byte 0 +_far_unaligned: + .quad 0 +.space 4000 +_far_offset: + .byte 0 + +.loh AdrpAddLdr L1, L2, L3 +.loh AdrpAddLdr L4, L5, L6 +.loh AdrpAddLdr L7, L8, L9 +.loh AdrpAddLdr L10, L11, L12 +.loh AdrpAddLdr L13, L14, L15 +.loh AdrpAddLdr L16, L17, L18 +.loh AdrpAddLdr L19, L20, L21 +.loh AdrpAddLdr L22, L23, L24 +.loh AdrpAddLdr L25, L26, L27 +.loh AdrpAddLdr L28, L29, L30 +.loh AdrpAddLdr L31, L32, L33 +.loh AdrpAddLdr L34, L35, L36 +.loh AdrpAddLdr L37, L38, L39 +.loh AdrpAddLdr L40, L41, L42 +.loh AdrpAddLdr L43, L44, L45