Index: lld/MachO/Arch/ARM64.cpp =================================================================== --- lld/MachO/Arch/ARM64.cpp +++ lld/MachO/Arch/ARM64.cpp @@ -36,6 +36,9 @@ uint64_t entryAddr) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; void populateThunk(InputSection *thunk, Symbol *funcSym) override; + void applyOptimizationHints( + uint8_t *buf, const std::vector &, + const DenseMap &) const override; }; } // namespace @@ -150,6 +153,125 @@ stubHelperEntrySize = sizeof(stubHelperEntryCode); } +namespace { +struct ADRP { + uint32_t destRegister; +}; + +static bool parseADRP(uint32_t insn, ADRP &adrp) { + if ((insn & 0x9f000000) != 0x90000000) + return false; + adrp.destRegister = insn & 0x1f; + return true; +} + +struct ADD { + uint8_t destRegister; + uint8_t srcRegister; + uint32_t addend; +}; + +static bool parseADD(uint32_t insn, ADD &add) { + if ((insn & 0xffc00000) != 0x91000000) + return false; + add.destRegister = insn & 0x1f; + add.srcRegister = (insn >> 5) & 0x1f; + add.addend = (insn >> 10) & 0xfff; + return true; +} + +static void writeADR(void *loc, uint32_t dest, int32_t delta) { + uint32_t instruction = 0x10000000; + uint32_t immHi = (delta & 0x001ffffc) << 3; + uint32_t immLo = (delta & 0x00000003) << 29; + write32le(loc, instruction | immHi | immLo | dest); +} + +static void writeNOP(void *loc) { write32le(loc, 0xd503201f); } + +static void applyAdrpAdd(uint8_t *buf, const OptimizationHint &hint, + const DenseMap &relocs) { + uint32_t ins1 = read32le(buf + hint.offsets[0]); + uint32_t ins2 = read32le(buf + hint.offsets[1]); + ADRP adrp; + if (!parseADRP(ins1, adrp)) + return; + ADD add; + if (!parseADD(ins2, add)) + return; + if (adrp.destRegister != add.srcRegister) + return; + + auto rel1 = relocs.find(buf + hint.offsets[0]); + auto rel2 = relocs.find(buf + hint.offsets[1]); + if (rel1 == relocs.end() || rel2 == relocs.end()) + return; + if (rel1->second.referentVA != rel2->second.referentVA) + return; + int64_t delta = rel1->second.referentVA - rel1->second.relocVA; + if (delta >= 1024 * 1024 || delta <= -1024 * 1024) + return; + + writeADR(buf + hint.offsets[0], add.destRegister, delta); + writeNOP(buf + hint.offsets[1]); +} + +static void applyAdrpAdrp(uint8_t *buf, const OptimizationHint &hint, + const DenseMap &relocs) { + uint32_t ins1 = read32le(buf + hint.offsets[0]); + uint32_t ins2 = read32le(buf + hint.offsets[1]); + ADRP adrp1, adrp2; + if (!parseADRP(ins1, adrp1) || !parseADRP(ins2, adrp2)) + return; + if (adrp1.destRegister != adrp2.destRegister) + return; + + auto rel1 = relocs.find(buf + hint.offsets[0]); + auto rel2 = relocs.find(buf + hint.offsets[1]); + if (rel1 == relocs.end() || rel2 == relocs.end()) + return; + if ((rel1->second.referentVA & ~4095ULL) != + (rel2->second.referentVA & ~4095ULL)) + return; + + writeNOP(buf + hint.offsets[1]); +} +} // namespace + +void ARM64::applyOptimizationHints( + uint8_t *buf, const std::vector &hints, + const DenseMap &relocs) const { + // Note: Some of these optimizations might not be valid when shared regions + // are in use. Will need to revisit this if splitSegInfo is added. + + for (const OptimizationHint &hint : hints) { + switch (hint.type) { + case LOH_ARM64_ADRP_ADRP: + // This is done in another pass because the other optimization hints + // might cause its targets to be turned into NOPs. + break; + case LOH_ARM64_ADRP_LDR: + case LOH_ARM64_ADRP_ADD_LDR: + case LOH_ARM64_ADRP_LDR_GOT_LDR: + case LOH_ARM64_ADRP_ADD_STR: + case LOH_ARM64_ADRP_LDR_GOT_STR: + // TODO: Implement these + break; + case LOH_ARM64_ADRP_ADD: + applyAdrpAdd(buf, hint, relocs); + break; + case LOH_ARM64_ADRP_LDR_GOT: + // TODO: Implement this as well + break; + } + } + + for (const OptimizationHint &hint : hints) { + if (hint.type == LOH_ARM64_ADRP_ADRP) + applyAdrpAdrp(buf, hint, relocs); + } +} + TargetInfo *macho::createARM64TargetInfo() { static ARM64 t; return &t; Index: lld/MachO/Config.h =================================================================== --- lld/MachO/Config.h +++ lld/MachO/Config.h @@ -130,6 +130,7 @@ bool dedupLiterals = true; bool omitDebugInfo = false; bool warnDylibInstallName = false; + bool ignoreOptimizationHints = false; // Temporary config flag that will be removed once we have fully implemented // support for __eh_frame. bool parseEhFrames = false; Index: lld/MachO/Driver.cpp =================================================================== --- lld/MachO/Driver.cpp +++ lld/MachO/Driver.cpp @@ -1300,6 +1300,7 @@ config->icfLevel != ICFLevel::none; config->warnDylibInstallName = args.hasFlag( OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false); + config->ignoreOptimizationHints = args.hasArg(OPT_ignore_optimization_hints); config->callGraphProfileSort = args.hasFlag( OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); Index: lld/MachO/InputFiles.h =================================================================== --- lld/MachO/InputFiles.h +++ lld/MachO/InputFiles.h @@ -179,6 +179,7 @@ void parseRelocations(ArrayRef sectionHeaders, const SectionHeader &, Section &); void parseDebugInfo(); + void parseOptimizationHints(ArrayRef data); void splitEhFrames(ArrayRef dataArr, Section &ehFrameSection); void registerCompactUnwind(Section &compactUnwindSection); void registerEhFrames(Section &ehFrameSection); Index: lld/MachO/InputFiles.cpp =================================================================== --- lld/MachO/InputFiles.cpp +++ lld/MachO/InputFiles.cpp @@ -65,6 +65,7 @@ #include "llvm/LTO/LTO.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" @@ -449,6 +450,81 @@ return *it; } +void ObjFile::parseOptimizationHints(ArrayRef data) { + auto expectedArgCount = [](uint8_t type) { + switch (type) { + case LOH_ARM64_ADRP_ADRP: + case LOH_ARM64_ADRP_LDR: + case LOH_ARM64_ADRP_ADD: + case LOH_ARM64_ADRP_LDR_GOT: + return 2; + case LOH_ARM64_ADRP_ADD_LDR: + case LOH_ARM64_ADRP_ADD_STR: + case LOH_ARM64_ADRP_LDR_GOT_LDR: + case LOH_ARM64_ADRP_LDR_GOT_STR: + return 3; + } + return -1; + }; + + for (const uint8_t *p = data.begin(); p < data.end();) { + const ptrdiff_t inputOffset = p - data.begin(); + uint64_t address[3]; + unsigned int n = 0; + uint8_t type = decodeULEB128(p, &n, data.end()); + p += n; + + // An entry of type 0 terminates the list. + if (type == 0) + break; + + uint8_t argCount = decodeULEB128(p, &n, data.end()); + p += n; + if (argCount != expectedArgCount(type)) { + int expectedCount = expectedArgCount(type); + if (expectedCount == -1) { + warn("Linker optimization hint at offset " + Twine(inputOffset) + + " has unknown type " + Twine(type)); + // Skip it. + for (int i = 0; i < argCount; ++i) { + decodeULEB128(p, &n, data.end()); + p += n; + } + } else { + error("Linker optimization hint at offset " + Twine(inputOffset) + + " has " + Twine(argCount) + + " arguments instead of the expected " + Twine(expectedCount)); + return; + } + break; + } + + uint64_t minAddr = UINT64_MAX, maxAddr = 0; + for (int i = 0; i < argCount; ++i) { + address[i] = decodeULEB128(p, &n, data.end()); + minAddr = std::min(minAddr, address[i]); + maxAddr = std::max(maxAddr, address[i]); + p += n; + } + + uint64_t offset = minAddr; + auto *section = findContainingSection(sections, &offset); + auto *subsec = findContainingSubsection(*section, &offset); + + uint64_t sectionOffset = minAddr - offset; + if (maxAddr - sectionOffset >= subsec->getSize()) { + error("Linker optimization hint at offset " + Twine(inputOffset) + + " spans multiple sections."); + return; + } + + subsec->optimizationHints.push_back( + {type, + {address[0] - sectionOffset, address[1] - sectionOffset, + address[2] - sectionOffset}}); + } +} + template static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec, relocation_info rel) { @@ -949,6 +1025,11 @@ if (!sections[i]->subsections.empty()) parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]); + if (!config->ignoreOptimizationHints) + if (auto *cmd = findCommand( + hdr, LC_LINKER_OPTIMIZATION_HINT)) + parseOptimizationHints({buf + cmd->dataoff, cmd->datasize}); + parseDebugInfo(); Section *ehFrameSection = nullptr; Index: lld/MachO/InputSection.h =================================================================== --- lld/MachO/InputSection.h +++ lld/MachO/InputSection.h @@ -79,6 +79,7 @@ OutputSection *parent = nullptr; ArrayRef data; std::vector relocs; + std::vector optimizationHints; // The symbols that belong to this InputSection, sorted by value. With // .subsections_via_symbols, there is typically only one element here. llvm::TinyPtrVector symbols; Index: lld/MachO/InputSection.cpp =================================================================== --- lld/MachO/InputSection.cpp +++ lld/MachO/InputSection.cpp @@ -30,7 +30,9 @@ // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), // so account for that. static_assert(sizeof(void *) != 8 || - sizeof(ConcatInputSection) == sizeof(std::vector) + 88, + sizeof(ConcatInputSection) == + sizeof(std::vector) + + sizeof(std::vector) + 88, "Try to minimize ConcatInputSection's size, we create many " "instances of it"); @@ -123,6 +125,8 @@ memcpy(buf, data.data(), data.size()); + DenseMap performedRelocs; + for (size_t i = 0; i < relocs.size(); i++) { const Reloc &r = relocs[i]; uint8_t *loc = buf + r.offset; @@ -158,7 +162,13 @@ referentVA = referentIsec->getVA(r.addend); } target->relocateOne(loc, r, referentVA, getVA() + r.offset); + + if (target->cpuType == CPU_TYPE_ARM64 && !config->ignoreOptimizationHints) + performedRelocs.insert({loc, {r, referentVA, getVA() + r.offset}}); } + + if (target->cpuType == CPU_TYPE_ARM64 && !config->ignoreOptimizationHints) + target->applyOptimizationHints(buf, optimizationHints, performedRelocs); } ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, Index: lld/MachO/Options.td =================================================================== --- lld/MachO/Options.td +++ lld/MachO/Options.td @@ -1254,8 +1254,7 @@ Flags<[HelpHidden]>, Group; def ignore_optimization_hints : Flag<["-"], "ignore_optimization_hints">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, + HelpText<"Ignore Linker Optimization Hints">, Group; def init_offsets : Flag<["-"], "init_offsets">, HelpText<"This option is undocumented in ld64">, Index: lld/MachO/Relocations.h =================================================================== --- lld/MachO/Relocations.h +++ lld/MachO/Relocations.h @@ -70,6 +70,20 @@ addend(addend), referent(referent) {} }; +struct PerformedReloc { + const Reloc &rel; + // The virtual address the relocation was resolved to. + uint64_t referentVA; + // The virtual address at which the relocation is found. + uint64_t relocVA; +}; + +struct OptimizationHint { + uint8_t type; + // The offsets from the start of thhe subsection that this hint applies to. + uint64_t offsets[3]; +}; + bool validateSymbolRelocation(const Symbol *, const InputSection *, const Reloc &); Index: lld/MachO/Target.h =================================================================== --- lld/MachO/Target.h +++ lld/MachO/Target.h @@ -13,6 +13,7 @@ #include "Relocations.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" @@ -28,6 +29,7 @@ class Defined; class DylibSymbol; class InputSection; +struct OptimizationHint; class TargetInfo { public: @@ -78,6 +80,10 @@ bool usesThunks() const { return thunkSize > 0; } + virtual void applyOptimizationHints( + uint8_t *, const std::vector &, + const llvm::DenseMap &) const {}; + uint32_t magic; llvm::MachO::CPUType cpuType; uint32_t cpuSubtype; Index: lld/test/MachO/hint-adrp-add.s =================================================================== --- /dev/null +++ lld/test/MachO/hint-adrp-add.s @@ -0,0 +1,32 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s +# CHECK-LABEL: _main: +# CHECK: adr x0 +# CHECK-NEXT: nop + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin -defsym=FAR=1 %s -o %t-far.o +# RUN: %lld -arch arm64 %t-far.o -o %t-far +# RUN: llvm-objdump -d --macho %t-far | FileCheck %s --check-prefix=FAR +# FAR-LABEL: _main: +# FAR: adrp x0 +# FAR-NOT: nop + +.text +.align 2 +.globl _main +_main: +L1: + adrp x0, _foo@PAGE +L2: + add x0, x0, _foo@PAGEOFF +.loh AdrpAdd L1, L2 + +.data +.ifdef FAR +.space 1048576 +.endif +_foo: + .long 0 Index: llvm/include/llvm/BinaryFormat/MachO.h =================================================================== --- llvm/include/llvm/BinaryFormat/MachO.h +++ llvm/include/llvm/BinaryFormat/MachO.h @@ -2237,6 +2237,17 @@ kSecCodeSignatureHashSHA512 = 5, /* SHA-512 */ }; +enum LinkerOptimizationHintKind { + LOH_ARM64_ADRP_ADRP = 1, + LOH_ARM64_ADRP_LDR = 2, + LOH_ARM64_ADRP_ADD_LDR = 3, + LOH_ARM64_ADRP_LDR_GOT_LDR = 4, + LOH_ARM64_ADRP_ADD_STR = 5, + LOH_ARM64_ADRP_LDR_GOT_STR = 6, + LOH_ARM64_ADRP_ADD = 7, + LOH_ARM64_ADRP_LDR_GOT = 8, +}; + } // end namespace MachO } // end namespace llvm