diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -25,98 +25,51 @@ struct X86_64 : TargetInfo { X86_64(); - bool isPairedReloc(relocation_info) const override; - uint64_t getAddend(MemoryBufferRef, const section_64 &, relocation_info, - relocation_info) const override; - void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override; + uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &, + const relocation_info) const override; + void relocateOne(uint8_t *loc, const Reloc &, uint64_t va, + uint64_t pc) const override; void writeStub(uint8_t *buf, const macho::Symbol &) const override; void writeStubHelperHeader(uint8_t *buf) const override; void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const override; - void prepareSymbolRelocation(lld::macho::Symbol *, const InputSection *, - const Reloc &) override; - uint64_t resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &, - uint8_t type) const override; + void relaxGotLoad(uint8_t *loc, uint8_t type) const override; + const TargetInfo::RelocAttrs &getRelocAttrs(uint8_t type) const override; }; } // namespace -static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec, - relocation_info rel) { - return ("invalid relocation at offset " + std::to_string(rel.r_address) + - " of " + sec.segname + "," + sec.sectname + " in " + - mb.getBufferIdentifier()) - .str(); +const TargetInfo::RelocAttrs &X86_64::getRelocAttrs(uint8_t type) const { + static const std::array relocAttrsArray{{ +#define B(x) RelocAttrBits::x + {"UNSIGNED", B(TLV) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(DYSYM8) | + B(BYTE4) | B(BYTE8)}, + {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, + {"SUBTRACTOR", B(SUBTRAHEND)}, + {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, +#undef B + }}; + assert(type >= 0 && type < relocAttrsArray.size() && + "invalid relocation type"); + if (type < 0 || type >= relocAttrsArray.size()) + return TargetInfo::invalidRelocAttrs; + return relocAttrsArray[type]; } -static void validateLength(MemoryBufferRef mb, const section_64 &sec, - relocation_info rel, - ArrayRef validLengths) { - if (find(validLengths, rel.r_length) != validLengths.end()) - return; - - std::string msg = getErrorLocation(mb, sec, rel) + ": relocations of type " + - std::to_string(rel.r_type) + " must have r_length of "; - bool first = true; - for (uint8_t length : validLengths) { - if (!first) - msg += " or "; - first = false; - msg += std::to_string(length); - } - fatal(msg); -} - -bool X86_64::isPairedReloc(relocation_info rel) const { - return rel.r_type == X86_64_RELOC_SUBTRACTOR; -} - -uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec, - relocation_info rel, - relocation_info pairedRel) const { +uint64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec, + relocation_info rel) const { auto *buf = reinterpret_cast(mb.getBufferStart()); const uint8_t *loc = buf + sec.offset + rel.r_address; - if (isThreadLocalVariables(sec.flags) && rel.r_type != X86_64_RELOC_UNSIGNED) - error("relocations in thread-local variable sections must be " - "X86_64_RELOC_UNSIGNED"); - - switch (rel.r_type) { - case X86_64_RELOC_BRANCH: - // XXX: ld64 also supports r_length = 0 here but I'm not sure when such a - // relocation will actually be generated. - validateLength(mb, sec, rel, {2}); - break; - case X86_64_RELOC_SIGNED: - case X86_64_RELOC_SIGNED_1: - case X86_64_RELOC_SIGNED_2: - case X86_64_RELOC_SIGNED_4: - case X86_64_RELOC_GOT_LOAD: - case X86_64_RELOC_GOT: - case X86_64_RELOC_TLV: - if (!rel.r_pcrel) - fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " + - std::to_string(rel.r_type) + " must be pcrel"); - validateLength(mb, sec, rel, {2}); - break; - case X86_64_RELOC_UNSIGNED: - if (rel.r_pcrel) - fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " + - std::to_string(rel.r_type) + " must not be pcrel"); - validateLength(mb, sec, rel, {2, 3}); - break; - default: - error("TODO: Unhandled relocation type " + std::to_string(rel.r_type)); - return 0; - } - switch (rel.r_length) { - case 0: - return *loc; - case 1: - return read16le(loc); case 2: return read32le(loc); case 3: @@ -126,40 +79,17 @@ } } -void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const { - switch (r.type) { - case X86_64_RELOC_BRANCH: - case X86_64_RELOC_SIGNED: - case X86_64_RELOC_SIGNED_1: - case X86_64_RELOC_SIGNED_2: - case X86_64_RELOC_SIGNED_4: - case X86_64_RELOC_GOT_LOAD: - case X86_64_RELOC_GOT: - case X86_64_RELOC_TLV: - // These types are only used for pc-relative relocations, so offset by 4 - // since the RIP has advanced by 4 at this point. This is only valid when - // r_length = 2, which is enforced by validateLength(). - val -= 4; - break; - case X86_64_RELOC_UNSIGNED: - break; - default: - llvm_unreachable( - "getAddend should have flagged all unhandled relocation types"); - } - +void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value, + uint64_t pc) const { + value += r.addend; + if (r.pcrel) + value -= (pc + 4); switch (r.length) { - case 0: - *loc = val; - break; - case 1: - write16le(loc, val); - break; case 2: - write32le(loc, val); + write32le(loc, value); break; case 3: - write64le(loc, val); + write64le(loc, value); break; default: llvm_unreachable("invalid r_length"); @@ -201,11 +131,6 @@ 0x90, // 0xf: nop }; -static constexpr uint8_t stubHelperEntry[] = { - 0x68, 0, 0, 0, 0, // 0x0: pushq - 0xe9, 0, 0, 0, 0, // 0x5: jmp <__stub_helper> -}; - void X86_64::writeStubHelperHeader(uint8_t *buf) const { memcpy(buf, stubHelperHeader, sizeof(stubHelperHeader)); writeRipRelative(buf, in.stubHelper->addr, 7, in.imageLoaderCache->getVA()); @@ -214,6 +139,11 @@ in.stubHelper->stubBinder->gotIndex * WordSize); } +static constexpr uint8_t stubHelperEntry[] = { + 0x68, 0, 0, 0, 0, // 0x0: pushq + 0xe9, 0, 0, 0, 0, // 0x5: jmp <__stub_helper> +}; + void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym, uint64_t entryAddr) const { memcpy(buf, stubHelperEntry, sizeof(stubHelperEntry)); @@ -222,110 +152,11 @@ in.stubHelper->addr); } -void X86_64::prepareSymbolRelocation(lld::macho::Symbol *sym, - const InputSection *isec, const Reloc &r) { - switch (r.type) { - case X86_64_RELOC_GOT_LOAD: { - if (needsBinding(sym)) - in.got->addEntry(sym); - - if (sym->isTlv()) - error("found GOT relocation referencing thread-local variable in " + - toString(isec)); - break; - } - case X86_64_RELOC_GOT: { - in.got->addEntry(sym); - - if (sym->isTlv()) - error("found GOT relocation referencing thread-local variable in " + - toString(isec)); - break; - } - case X86_64_RELOC_BRANCH: { - prepareBranchTarget(sym); - break; - } - case X86_64_RELOC_UNSIGNED: { - if (auto *dysym = dyn_cast(sym)) { - if (r.length != 3) { - error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " + - dysym->getName() + " must have r_length = 3"); - return; - } - } - // References from thread-local variable sections are treated as offsets - // relative to the start of the referent section, and therefore have no - // need of rebase opcodes. - if (!(isThreadLocalVariables(isec->flags) && isa(sym))) - addNonLazyBindingEntries(sym, isec, r.offset, r.addend); - break; - } - case X86_64_RELOC_SIGNED: - case X86_64_RELOC_SIGNED_1: - case X86_64_RELOC_SIGNED_2: - case X86_64_RELOC_SIGNED_4: - // TODO: warn if they refer to a weak global - break; - case X86_64_RELOC_TLV: { - if (needsBinding(sym)) - in.tlvPointers->addEntry(sym); - - if (!sym->isTlv()) - error( - "found X86_64_RELOC_TLV referencing a non-thread-local variable in " + - toString(isec)); - break; - } - case X86_64_RELOC_SUBTRACTOR: - fatal("TODO: handle relocation type " + std::to_string(r.type)); - break; - default: - llvm_unreachable("unexpected relocation type"); - } -} - -uint64_t X86_64::resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &sym, - uint8_t type) const { - switch (type) { - case X86_64_RELOC_GOT_LOAD: { - if (!sym.isInGot()) { - if (buf[-2] != 0x8b) - error("X86_64_RELOC_GOT_LOAD must be used with movq instructions"); - buf[-2] = 0x8d; - return sym.getVA(); - } - LLVM_FALLTHROUGH; - } - case X86_64_RELOC_GOT: - return in.got->addr + sym.gotIndex * WordSize; - case X86_64_RELOC_BRANCH: { - if (sym.isInStubs()) - return in.stubs->addr + sym.stubsIndex * sizeof(stub); - return sym.getVA(); - } - case X86_64_RELOC_UNSIGNED: - case X86_64_RELOC_SIGNED: - case X86_64_RELOC_SIGNED_1: - case X86_64_RELOC_SIGNED_2: - case X86_64_RELOC_SIGNED_4: - return sym.getVA(); - case X86_64_RELOC_TLV: { - if (sym.isInGot()) - return in.tlvPointers->addr + sym.gotIndex * WordSize; - - // Convert the movq to a leaq. - assert(isa(&sym)); - if (buf[-2] != 0x8b) - error("X86_64_RELOC_TLV must be used with movq instructions"); - buf[-2] = 0x8d; - return sym.getVA(); - } - case X86_64_RELOC_SUBTRACTOR: - fatal("TODO: handle relocation type " + std::to_string(type)); - default: - llvm_unreachable("Unexpected relocation type"); - } +void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const { + // Convert MOVQ to LEAQ + if (loc[-2] != 0x8b) + error(getRelocAttrs(type).name + " reloc requires MOVQ instruction"); + loc[-2] = 0x8d; } X86_64::X86_64() { diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -310,11 +310,10 @@ break; case file_magic::macho_dynamically_linked_shared_lib: case file_magic::macho_dynamically_linked_shared_lib_stub: - case file_magic::tapi_file: { + case file_magic::tapi_file: if (Optional dylibFile = loadDylib(mbref)) newFile = *dylibFile; break; - } case file_magic::bitcode: newFile = make(mbref); break; @@ -786,13 +785,11 @@ case OPT_INPUT: addFile(arg->getValue(), false); break; - case OPT_weak_library: { - auto *dylibFile = - dyn_cast_or_null(addFile(arg->getValue(), false)); - if (dylibFile) + case OPT_weak_library: + if (auto *dylibFile = + dyn_cast_or_null(addFile(arg->getValue(), false))) dylibFile->forceWeakImport = true; break; - } case OPT_filelist: addFileList(arg->getValue()); break; diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -203,6 +203,38 @@ return it->second; } +static bool validateRelocationInfo(MemoryBufferRef mb, const section_64 &sec, + relocation_info rel) { + const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type); + bool valid = true; + auto message = [relocAttrs, mb, sec, rel, &valid](const Twine &diagnostic) { + valid = false; + return (relocAttrs.name + " relocation " + diagnostic + " at offset " + + std::to_string(rel.r_address) + " of " + sec.segname + "," + + sec.sectname + " in " + mb.getBufferIdentifier()) + .str(); + }; + + if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern) + error(message("must be extern")); + if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel) + error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") + + "be PC-relative")); + if (isThreadLocalVariables(sec.flags) && + (!relocAttrs.hasAttr(RelocAttrBits::TLV) || + relocAttrs.hasAttr(RelocAttrBits::LOAD))) + error(message("not allowed in thread-local section, must be UNSIGNED")); + if (rel.r_length < 2 || rel.r_length > 3 || + !relocAttrs.hasAttr(static_cast(1 << rel.r_length))) { + static SmallVector widths{"INVALID", "4", "8", "4 or 8"}; + error(message("has width " + std::to_string(1 << rel.r_length) + + " bytes, but must be " + + widths[(static_cast(relocAttrs.bits) >> 2) & 3] + + " bytes")); + } + return valid; +} + void ObjFile::parseRelocations(const section_64 &sec, SubsectionMap &subsecMap) { auto *buf = reinterpret_cast(mb.getBufferStart()); @@ -217,8 +249,8 @@ // // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend, // and the paired *_RELOC_UNSIGNED record holds the minuend. The - // datum for each is a symbolic address. The result is the runtime - // offset between two addresses. + // datum for each is a symbolic address. The result is the offset + // between two addresses. // // The ARM64_RELOC_ADDEND record holds the addend, and the paired // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the @@ -235,23 +267,35 @@ // and insert them. Storing addends in the instruction stream is // possible, but inconvenient and more costly at link time. - relocation_info pairedInfo = relInfos[i]; - relocation_info relInfo = - target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo; + uint64_t pairedAddend = 0; + relocation_info relInfo = relInfos[i]; + if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) { + pairedAddend = SignExtend64<24>(relInfo.r_symbolnum); + relInfo = relInfos[++i]; + } assert(i < relInfos.size()); + if (!validateRelocationInfo(mb, sec, relInfo)) + continue; if (relInfo.r_address & R_SCATTERED) fatal("TODO: Scattered relocations not supported"); - + uint64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo); + assert(!(embeddedAddend && pairedAddend)); + uint64_t totalAddend = pairedAddend + embeddedAddend; + + Reloc p; + if (target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND)) { + p.type = relInfo.r_type; + p.referent = symbols[relInfo.r_symbolnum]; + relInfo = relInfos[++i]; + } Reloc r; r.type = relInfo.r_type; r.pcrel = relInfo.r_pcrel; r.length = relInfo.r_length; r.offset = relInfo.r_address; - // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored - uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo); if (relInfo.r_extern) { r.referent = symbols[relInfo.r_symbolnum]; - r.addend = rawAddend; + r.addend = totalAddend; } else { SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; @@ -263,16 +307,19 @@ // TODO: The offset of 4 is probably not right for ARM64, nor for // relocations with r_length != 2. referentOffset = - sec.addr + relInfo.r_address + 4 + rawAddend - referentSec.addr; + sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr; } else { // The addend for a non-pcrel relocation is its absolute address. - referentOffset = rawAddend - referentSec.addr; + referentOffset = totalAddend - referentSec.addr; } r.referent = findContainingSubsection(referentSubsecMap, &referentOffset); r.addend = referentOffset; } InputSection *subsec = findContainingSubsection(subsecMap, &r.offset); + if (p.type != GENERIC_RELOC_INVALID && + target->hasAttr(p.type, RelocAttrBits::SUBTRAHEND)) + subsec->relocs.push_back(p); subsec->relocs.push_back(r); } } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -23,16 +23,16 @@ class Symbol; struct Reloc { - uint8_t type; - bool pcrel; - uint8_t length; + uint8_t type = llvm::MachO::GENERIC_RELOC_INVALID; + bool pcrel = false; + uint8_t length = 0; // The offset from the start of the subsection that this relocation belongs // to. - uint32_t offset; + uint32_t offset = 0; // Adding this offset to the address of the referent symbol or subsection // gives the destination that this relocation refers to. - uint64_t addend; - llvm::PointerUnion referent; + uint64_t addend = 0; + llvm::PointerUnion referent = nullptr; }; class InputSection { diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -10,6 +10,7 @@ #include "InputFiles.h" #include "OutputSegment.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" #include "Writer.h" #include "lld/Common/Memory.h" @@ -33,17 +34,46 @@ uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } +static uint64_t resolveSymbolVA(uint8_t *loc, const lld::macho::Symbol &sym, + uint8_t type) { + const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(type); + if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { + if (sym.isInStubs()) + return in.stubs->addr + sym.stubsIndex * target->stubSize; + } else if (relocAttrs.hasAttr(RelocAttrBits::GOT | RelocAttrBits::LOAD)) { + if (sym.isInGot()) + return in.got->addr + sym.gotIndex * WordSize; + } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { + return in.got->addr + sym.gotIndex * WordSize; + } else if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD)) { + if (sym.isInGot()) + return in.tlvPointers->addr + sym.gotIndex * WordSize; + assert(isa(&sym)); + } + return sym.getVA(); +} + void InputSection::writeTo(uint8_t *buf) { if (getFileSize() == 0) return; memcpy(buf, data.data(), data.size()); - for (Reloc &r : relocs) { + for (size_t i = 0; i < relocs.size(); i++) { + const Reloc &r = relocs[i]; + uint8_t *loc = buf + r.offset; + auto *fromSym = target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND) + ? relocs[i++].referent.dyn_cast() + : nullptr; uint64_t referentVA = 0; - if (auto *referentSym = r.referent.dyn_cast()) { - referentVA = - target->resolveSymbolVA(buf + r.offset, *referentSym, r.type); + if (fromSym) { + auto *toSym = r.referent.dyn_cast(); + referentVA = toSym->getVA() - fromSym->getVA(); + } else if (auto *referentSym = r.referent.dyn_cast()) { + if (target->hasAttr(r.type, RelocAttrBits::LOAD) && + !referentSym->isInGot()) + target->relaxGotLoad(loc, r.type); + referentVA = resolveSymbolVA(loc, *referentSym, r.type); if (isThreadLocalVariables(flags)) { // References from thread-local variable sections are treated as offsets @@ -56,11 +86,7 @@ } else if (auto *referentIsec = r.referent.dyn_cast()) { referentVA = referentIsec->getVA(); } - - uint64_t referentVal = referentVA + r.addend; - if (r.pcrel) - referentVal -= getVA() + r.offset; - target->relocateOne(buf + r.offset, r, referentVal); + target->relocateOne(loc, r, referentVA, getVA() + r.offset); } } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -380,9 +380,7 @@ in.rebase->addEntry(section, offset); if (defined->isExternalWeakDef()) in.weakBinding->addEntry(sym, section, offset, addend); - } else if (isa(sym)) { - error("cannot bind to " + DSOHandle::name); - } else { + } else if (!isa(sym)) { // Undefined symbols are filtered out in scanRelocations(); we should never // get here llvm_unreachable("cannot bind to an undefined symbol"); diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_TARGET_H #define LLD_MACHO_TARGET_H +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/MemoryBuffer.h" @@ -17,6 +18,7 @@ namespace lld { namespace macho { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); class Symbol; class DylibSymbol; @@ -32,17 +34,41 @@ MaxAlignmentPowerOf2 = 32, }; +enum class RelocAttrBits { + _0 = 0, // invalid + PCREL = 1 << 0, // Value is PC-relative offset + ABSOLUTE = 1 << 1, // Value is an absolute address or fixed offset + BYTE4 = 1 << 2, // 4 byte datum + BYTE8 = 1 << 3, // 8 byte datum + EXTERN = 1 << 4, // Can have an external symbol + LOCAL = 1 << 5, // Can have a local symbol + ADDEND = 1 << 6, // *_ADDEND paired prefix reloc + SUBTRAHEND = 1 << 7, // *_SUBTRACTOR paired prefix reloc + BRANCH = 1 << 8, // Value is branch target + GOT = 1 << 9, // Pertains to Global Offset Table slots + TLV = 1 << 10, // Pertains to Thread-Local Variable slots + DYSYM8 = 1 << 11, // Requires DySym width to be 8 bytes + LOAD = 1 << 12, // Relaxable indirect load + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ LOAD), +}; + class TargetInfo { public: + struct RelocAttrs { + llvm::StringRef name; + RelocAttrBits bits; + bool hasAttr(RelocAttrBits b) const { return (bits & b) == b; } + }; + static const RelocAttrs invalidRelocAttrs; + virtual ~TargetInfo() = default; // Validate the relocation structure and get its addend. - virtual uint64_t getAddend(llvm::MemoryBufferRef, - const llvm::MachO::section_64 &, - llvm::MachO::relocation_info, - llvm::MachO::relocation_info) const = 0; - virtual bool isPairedReloc(llvm::MachO::relocation_info) const = 0; - virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const = 0; + virtual uint64_t + getEmbeddedAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &, + const llvm::MachO::relocation_info) const = 0; + virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t va, + uint64_t pc) const = 0; // Write code for lazy binding. See the comments on StubsSection for more // details. @@ -56,10 +82,20 @@ // GOT/stubs entries, and resolveSymbolVA() will return the addresses of those // entries. resolveSymbolVA() may also relax the target instructions to save // on a level of address indirection. - virtual void prepareSymbolRelocation(Symbol *, const InputSection *, - const Reloc &) = 0; - virtual uint64_t resolveSymbolVA(uint8_t *buf, const Symbol &, - uint8_t type) const = 0; + virtual void relaxGotLoad(uint8_t *loc, uint8_t type) const = 0; + + virtual const RelocAttrs &getRelocAttrs(uint8_t type) const = 0; + + bool hasAttr(uint8_t type, RelocAttrBits bit) const { + return getRelocAttrs(type).hasAttr(bit); + } + + bool validateRelocationInfo(llvm::MemoryBufferRef, + const llvm::MachO::section_64 &sec, + llvm::MachO::relocation_info); + bool validateSymbolRelocation(const Symbol *, const InputSection *isec, + const Reloc &); + void prepareSymbolRelocation(Symbol *, const InputSection *, const Reloc &); uint32_t cpuType; uint32_t cpuSubtype; diff --git a/lld/MachO/Target.cpp b/lld/MachO/Target.cpp --- a/lld/MachO/Target.cpp +++ b/lld/MachO/Target.cpp @@ -7,8 +7,42 @@ //===----------------------------------------------------------------------===// #include "Target.h" +#include "InputSection.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "lld/Common/ErrorHandler.h" + +using namespace llvm; +using namespace llvm::MachO; using namespace lld; using namespace lld::macho; +const TargetInfo::RelocAttrs TargetInfo::invalidRelocAttrs{"INVALID", + RelocAttrBits::_0}; + +bool TargetInfo::validateSymbolRelocation(const Symbol *sym, + const InputSection *isec, + const Reloc &r) { + const RelocAttrs &relocAttrs = getRelocAttrs(r.type); + bool valid = true; + auto message = [relocAttrs, sym, isec, &valid](const Twine &diagnostic) { + valid = false; + return (relocAttrs.name + " relocation " + diagnostic + " for `" + + sym->getName() + "' in " + toString(isec)) + .str(); + }; + + if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD) != + sym->isTlv()) + error(message(Twine("requires that variable ") + + (sym->isTlv() ? "not " : "") + "be thread-local")); + if (relocAttrs.hasAttr(RelocAttrBits::DYSYM8) && isa(sym) && + r.length != 3) + error(message("has width " + std::to_string(1 << r.length) + + " bytes, but must be 8 bytes")); + + return valid; +} + TargetInfo *macho::target = nullptr; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -402,6 +402,29 @@ } // namespace +static void prepareSymbolRelocation(lld::macho::Symbol *sym, + const InputSection *isec, const Reloc &r) { + const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); + + if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) { + prepareBranchTarget(sym); + } else if (relocAttrs.hasAttr(RelocAttrBits::GOT | RelocAttrBits::LOAD)) { + if (needsBinding(sym)) + in.got->addEntry(sym); + } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) { + in.got->addEntry(sym); + } else if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD)) { + if (needsBinding(sym)) + in.tlvPointers->addEntry(sym); + } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) { + // References from thread-local variable sections are treated as offsets + // relative to the start of the referent section, and therefore have no + // need of rebase opcodes. + if (!(isThreadLocalVariables(isec->flags) && isa(sym))) + addNonLazyBindingEntries(sym, isec, r.offset, r.addend); + } +} + void Writer::scanRelocations() { for (InputSection *isec : inputSections) { // We do not wish to add rebase opcodes for __LD,__compact_unwind, because @@ -409,13 +432,17 @@ // before Writer runs might be cleaner... if (isec->segname == segment_names::ld) continue; + if (isec->name == section_names::ehFrame) + continue; for (Reloc &r : isec->relocs) { - if (auto *s = r.referent.dyn_cast()) { - if (isa(s)) - treatUndefinedSymbol(toString(*s), toString(isec->file)); - else - target->prepareSymbolRelocation(s, isec, r); + if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) + continue; + if (auto *sym = r.referent.dyn_cast()) { + if (isa(sym)) + treatUndefinedSymbol(toString(*sym), toString(isec->file)); + else if (target->validateSymbolRelocation(sym, isec, r)) + prepareSymbolRelocation(sym, isec, r); } else { assert(r.referent.is()); if (!r.pcrel) @@ -614,7 +641,7 @@ uint32_t sectionIndex = 0; for (OutputSegment *seg : outputSegments) { seg->sortOutputSections(compareByOrder(sectionOrder)); - for (auto *osec : seg->getSections()) { + for (OutputSection *osec : seg->getSections()) { // Now that the output sections are sorted, assign the final // output section indices. if (!osec->isHidden()) @@ -693,7 +720,7 @@ fileOff = alignTo(fileOff, PageSize); seg->fileOff = fileOff; - for (auto *osec : seg->getSections()) { + for (OutputSection *osec : seg->getSections()) { if (!osec->isNeeded()) continue; addr = alignTo(addr, osec->align); diff --git a/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s b/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s --- a/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s +++ b/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s @@ -8,7 +8,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o # RUN: not %lld -lSystem -L%t -ltlv -o /dev/null %t/test.o 2>&1 | FileCheck %s -DFILE=%t/test.o -# CHECK: error: found GOT relocation referencing thread-local variable in [[FILE]]:(__text) +# CHECK: error: GOT_LOAD relocation requires that variable not be thread-local for `_foo' in [[FILE]]:(__text) #--- libtlv.s .section __DATA,__thread_vars,thread_local_variables diff --git a/lld/test/MachO/invalid/bad-got-to-tlv-reference.s b/lld/test/MachO/invalid/bad-got-to-tlv-reference.s --- a/lld/test/MachO/invalid/bad-got-to-tlv-reference.s +++ b/lld/test/MachO/invalid/bad-got-to-tlv-reference.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s -DFILE=%t.o -# CHECK: error: found GOT relocation referencing thread-local variable in [[FILE]]:(__text) +# CHECK: error: GOT_LOAD relocation requires that variable not be thread-local for `_foo' in [[FILE]]:(__text) .text .globl _main diff --git a/lld/test/MachO/invalid/bad-tlv-def.s b/lld/test/MachO/invalid/bad-tlv-def.s --- a/lld/test/MachO/invalid/bad-tlv-def.s +++ b/lld/test/MachO/invalid/bad-tlv-def.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s -# CHECK: error: relocations in thread-local variable sections must be X86_64_RELOC_UNSIGNED +# CHECK: error: GOT_LOAD relocation not allowed in thread-local section, must be UNSIGNED .text .globl _main diff --git a/lld/test/MachO/invalid/bad-tlv-opcode.s b/lld/test/MachO/invalid/bad-tlv-opcode.s --- a/lld/test/MachO/invalid/bad-tlv-opcode.s +++ b/lld/test/MachO/invalid/bad-tlv-opcode.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s -# CHECK: error: X86_64_RELOC_TLV must be used with movq instructions +# CHECK: error: TLV reloc requires MOVQ instruction .text .globl _main diff --git a/lld/test/MachO/invalid/bad-tlv-relocation.s b/lld/test/MachO/invalid/bad-tlv-relocation.s --- a/lld/test/MachO/invalid/bad-tlv-relocation.s +++ b/lld/test/MachO/invalid/bad-tlv-relocation.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s -DFILE=%t.o -# CHECK: error: found X86_64_RELOC_TLV referencing a non-thread-local variable in [[FILE]]:(__text) +# CHECK: TLV relocation requires that variable be thread-local for `_foo' in [[FILE]]:(__text) .text .globl _main diff --git a/lld/test/MachO/invalid/invalid-relocation-length.yaml b/lld/test/MachO/invalid/invalid-relocation-length.yaml --- a/lld/test/MachO/invalid/invalid-relocation-length.yaml +++ b/lld/test/MachO/invalid/invalid-relocation-length.yaml @@ -2,7 +2,7 @@ # RUN: yaml2obj %s -o %t.o # RUN: not %lld -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o # -# CHECK: error: invalid relocation at offset 1 of __TEXT,__text in [[FILE]]: relocations of type 0 must have r_length of 2 or 3 +# CHECK: error: UNSIGNED relocation has width 2 bytes, but must be 4 or 8 bytes at offset 1 of __TEXT,__text in [[FILE]] !mach-o FileHeader: diff --git a/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml b/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml --- a/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml +++ b/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml @@ -2,7 +2,7 @@ # RUN: yaml2obj %s -o %t.o # RUN: not %lld -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o # -# CHECK: error: invalid relocation at offset 1 of __TEXT,__text in [[FILE]]: relocations of type 0 must not be pcrel +# CHECK: error: UNSIGNED relocation must not be PC-relative at offset 1 of __TEXT,__text in [[FILE]] !mach-o FileHeader: diff --git a/lld/test/MachO/x86-64-reloc-subtract.s b/lld/test/MachO/x86-64-reloc-subtract.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/x86-64-reloc-subtract.s @@ -0,0 +1,33 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: %lld -o %t %t.o +# RUN: llvm-objdump --syms --full-contents %t | FileCheck %s + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK: {{0*}}[[#%x, SUB1ADDR:]] g {{.*}} __DATA,subby _sub1 +# CHECK: {{0*}}[[#%x, SUB2ADDR:]] g {{.*}} __DATA,subby _sub2 +# CHECK-LABEL: Contents of section __DATA,subby: +# CHECK: [[#SUB1ADDR]] 10000000 +# CHECK: [[#SUB2ADDR]] f0ffffff + +.globl _main, _sub1, _sub2 + +.section __DATA,subby +L_.subtrahend_1: + .space 16 +L_.minuend_1: + .space 16 +L_.minuend_2: + .space 16 +L_.subtrahend_2: + .space 16 +_sub1: + .long L_.minuend_1 - L_.subtrahend_1 + .space 12 +_sub2: + .long L_.minuend_2 - L_.subtrahend_2 + +.text +_main: + mov $0, %rax + ret diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h --- a/llvm/include/llvm/BinaryFormat/MachO.h +++ b/llvm/include/llvm/BinaryFormat/MachO.h @@ -399,6 +399,7 @@ // Constant values for the r_type field in an // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info // structure. + GENERIC_RELOC_INVALID = 0xff, GENERIC_RELOC_VANILLA = 0, GENERIC_RELOC_PAIR = 1, GENERIC_RELOC_SECTDIFF = 2,