diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -25,8 +25,9 @@ struct X86_64 : TargetInfo { X86_64(); - uint64_t getImplicitAddend(MemoryBufferRef, const section_64 &, - const relocation_info &) const override; + bool isPairedReloc(const relocation_info) const override; + uint64_t getAddend(MemoryBufferRef, const section_64 &, const relocation_info, + const relocation_info) const override; void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override; void writeStub(uint8_t *buf, const macho::Symbol &) const override; @@ -68,8 +69,13 @@ fatal(msg); } -uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec, - const relocation_info &rel) const { +bool X86_64::isPairedReloc(const relocation_info rel) const { + return rel.r_type == X86_64_RELOC_SUBTRACTOR; +} + +uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec, + const relocation_info rel, + const relocation_info pairedRel) const { auto *buf = reinterpret_cast(mb.getBufferStart()); const uint8_t *loc = buf + sec.offset + rel.r_address; @@ -139,7 +145,7 @@ break; default: llvm_unreachable( - "getImplicitAddend should have flagged all unhandled relocation types"); + "getAddend should have flagged all unhandled relocation types"); } switch (r.length) { diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -206,31 +206,54 @@ void ObjFile::parseRelocations(const section_64 &sec, SubsectionMap &subsecMap) { auto *buf = reinterpret_cast(mb.getBufferStart()); - ArrayRef anyRelInfos( - reinterpret_cast(buf + sec.reloff), - sec.nreloc); - - for (const any_relocation_info &anyRelInfo : anyRelInfos) { - if (anyRelInfo.r_word0 & R_SCATTERED) + ArrayRef relInfos( + reinterpret_cast(buf + sec.reloff), sec.nreloc); + + for (size_t i = 0; i < relInfos.size(); i++) { + // Paired relocations serve as Mach-O's method for attaching a + // supplemental datum to a primary relocation record. ELF does not + // need them because the *_RELOC_RELA records contain the extra + // addend field, vs. *_RELOC_REL which omit the addend. + // + // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend, + // and the paired *_RELOC_UNSIGNED record holds the minuend. The + // datum for each is a symbolic address. The result is the runtime + // offset between two addresses. + // + // The ARM64_RELOC_ADDEND record holds the addend, and the paired + // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the + // base symbolic address. + // + // Note: X86 does not use *_RELOC_ADDEND because it can embed an + // addend into the instruction stream. On X86, a relocatable address + // field always occupies an entire contiguous sequence of byte(s), + // so there is no need to merge opcode bits with address + // bits. Therefore, it's easy and convenient to store addends in the + // instruction-stream bytes that would otherwise contain zeroes. By + // contrast, RISC ISAs such as ARM64 mix opcode bits with with + // address bits so that bitwise arithmetic is necessary to extract + // and insert them. Storing addends in the instruction stream is + // possible, but inconvenient and more costly at runtime. + + const relocation_info pairedInfo = relInfos[i]; + const relocation_info relInfo = + target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo; + assert(i < relInfos.size()); + if (relInfo.r_address & R_SCATTERED) fatal("TODO: Scattered relocations not supported"); - auto relInfo = reinterpret_cast(anyRelInfo); - Reloc r; r.type = relInfo.r_type; r.pcrel = relInfo.r_pcrel; r.length = relInfo.r_length; - uint64_t rawAddend = target->getImplicitAddend(mb, sec, relInfo); - + r.offset = relInfo.r_address; + // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored + uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo); if (relInfo.r_extern) { r.referent = symbols[relInfo.r_symbolnum]; r.addend = rawAddend; } else { - if (relInfo.r_symbolnum == 0 || relInfo.r_symbolnum > subsections.size()) - fatal("invalid section index in relocation for offset " + - std::to_string(r.offset) + " in section " + sec.sectname + - " of " + getName()); - + assert(relInfo.r_symbolnum && relInfo.r_symbolnum <= subsections.size()); SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; uint32_t referentOffset; @@ -250,7 +273,6 @@ r.addend = referentOffset; } - r.offset = relInfo.r_address; InputSection *subsec = findContainingSubsection(subsecMap, &r.offset); subsec->relocs.push_back(r); } diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -37,9 +37,11 @@ virtual ~TargetInfo() = default; // Validate the relocation structure and get its addend. - virtual uint64_t - getImplicitAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &, - const llvm::MachO::relocation_info &) const = 0; + virtual uint64_t getAddend(llvm::MemoryBufferRef, + const llvm::MachO::section_64 &, + const llvm::MachO::relocation_info, + const llvm::MachO::relocation_info) const = 0; + virtual bool isPairedReloc(const llvm::MachO::relocation_info) const = 0; virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const = 0; // Write code for lazy binding. See the comments on StubsSection for more