diff --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h --- a/lld/MachO/Arch/ARM64Common.h +++ b/lld/MachO/Arch/ARM64Common.h @@ -40,16 +40,18 @@ // | | imm26 | // +-----------+---------------------------------------------------+ -inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) { - checkInt(r, va, 28); +inline void encodeBranch26(uint32_t *loc, const Reloc &r, uint32_t base, + uint64_t va) { + checkInt(loc, r, va, 28); // Since branch destinations are 4-byte aligned, the 2 least- // significant bits are 0. They are right shifted off the end. - return (base | bitField(va, 2, 26, 0)); + llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0)); } -inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) { - checkInt(d, va, 28); - return (base | bitField(va, 2, 26, 0)); +inline void encodeBranch26(uint32_t *loc, SymbolDiagnostic d, uint32_t base, + uint64_t va) { + checkInt(loc, d, va, 28); + llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0)); } // 30 29 23 5 @@ -57,14 +59,18 @@ // | |ilo| | immhi | | // +-+---+---------+-------------------------------------+---------+ -inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) { - checkInt(r, va, 35); - return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5)); +inline void encodePage21(uint32_t *loc, const Reloc &r, uint32_t base, + uint64_t va) { + checkInt(loc, r, va, 35); + llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) | + bitField(va, 14, 19, 5)); } -inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) { - checkInt(d, va, 35); - return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5)); +inline void encodePage21(uint32_t *loc, SymbolDiagnostic d, uint32_t base, + uint64_t va) { + checkInt(loc, d, va, 35); + llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) | + bitField(va, 14, 19, 5)); } // 21 10 @@ -72,7 +78,7 @@ // | | imm12 | | // +-------------------+-----------------------+-------------------+ -inline uint64_t encodePageOff12(uint32_t base, uint64_t va) { +inline void encodePageOff12(uint32_t *loc, uint32_t base, uint64_t va) { int scale = 0; if ((base & 0x3b00'0000) == 0x3900'0000) { // load/store scale = base >> 30; @@ -82,7 +88,8 @@ // TODO(gkm): extract embedded addend and warn if != 0 // uint64_t addend = ((base & 0x003FFC00) >> 10); - return (base | bitField(va, scale, 12 - scale, 10)); + llvm::support::endian::write32le(loc, + base | bitField(va, scale, 12 - scale, 10)); } inline uint64_t pageBits(uint64_t address) { @@ -99,9 +106,9 @@ pageBits(in.stubs->addr + sym.stubsIndex * stubCodeSize); uint64_t lazyPointerVA = in.lazyPointers->addr + sym.stubsIndex * LP::wordSize; - buf32[0] = encodePage21({&sym, "stub"}, stubCode[0], - pageBits(lazyPointerVA) - pcPageBits); - buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA); + encodePage21(&buf32[0], {&sym, "stub"}, stubCode[0], + pageBits(lazyPointerVA) - pcPageBits); + encodePageOff12(&buf32[1], stubCode[1], lazyPointerVA); buf32[2] = stubCode[2]; } @@ -114,15 +121,15 @@ }; uint64_t loaderVA = in.imageLoaderCache->getVA(); SymbolDiagnostic d = {nullptr, "stub header helper"}; - buf32[0] = encodePage21(d, stubHelperHeaderCode[0], - pageBits(loaderVA) - pcPageBits(0)); - buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA); + encodePage21(&buf32[0], d, stubHelperHeaderCode[0], + pageBits(loaderVA) - pcPageBits(0)); + encodePageOff12(&buf32[1], stubHelperHeaderCode[1], loaderVA); buf32[2] = stubHelperHeaderCode[2]; uint64_t binderVA = in.got->addr + in.stubHelper->stubBinder->gotIndex * LP::wordSize; - buf32[3] = encodePage21(d, stubHelperHeaderCode[3], - pageBits(binderVA) - pcPageBits(3)); - buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA); + encodePage21(&buf32[3], d, stubHelperHeaderCode[3], + pageBits(binderVA) - pcPageBits(3)); + encodePageOff12(&buf32[4], stubHelperHeaderCode[4], binderVA); buf32[5] = stubHelperHeaderCode[5]; } @@ -133,8 +140,8 @@ auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); }; uint64_t stubHelperHeaderVA = in.stubHelper->addr; buf32[0] = stubHelperEntryCode[0]; - buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1], - stubHelperHeaderVA - pcVA(1)); + encodeBranch26(&buf32[1], {&sym, "stub helper"}, stubHelperEntryCode[1], + stubHelperHeaderVA - pcVA(1)); buf32[2] = sym.lazyBindOffset; } diff --git a/lld/MachO/Arch/ARM64Common.cpp b/lld/MachO/Arch/ARM64Common.cpp --- a/lld/MachO/Arch/ARM64Common.cpp +++ b/lld/MachO/Arch/ARM64Common.cpp @@ -38,56 +38,57 @@ } } +static void writeValue(uint8_t *loc, const Reloc &r, uint64_t value) { + switch (r.length) { + case 2: + checkInt(loc, r, value, 32); + write32le(loc, value); + break; + case 3: + write64le(loc, value); + break; + default: + llvm_unreachable("invalid r_length"); + } +} + // For instruction relocations (load, store, add), the base // instruction is pre-populated in the text section. A pre-populated // instruction has opcode & register-operand bits set, with immediate // operands zeroed. We read it from text, OR-in the immediate // operands, then write-back the completed instruction. - void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value, uint64_t pc) const { + auto loc32 = reinterpret_cast(loc); uint32_t base = ((r.length == 2) ? read32le(loc) : 0); switch (r.type) { case ARM64_RELOC_BRANCH26: - value = encodeBranch26(r, base, value - pc); + encodeBranch26(loc32, r, base, value - pc); break; case ARM64_RELOC_SUBTRACTOR: case ARM64_RELOC_UNSIGNED: - if (r.length == 2) - checkInt(r, value, 32); + writeValue(loc, r, value); break; case ARM64_RELOC_POINTER_TO_GOT: if (r.pcrel) value -= pc; - checkInt(r, value, 32); + writeValue(loc, r, value); break; case ARM64_RELOC_PAGE21: case ARM64_RELOC_GOT_LOAD_PAGE21: - case ARM64_RELOC_TLVP_LOAD_PAGE21: { + case ARM64_RELOC_TLVP_LOAD_PAGE21: assert(r.pcrel); - value = encodePage21(r, base, pageBits(value) - pageBits(pc)); + encodePage21(loc32, r, base, pageBits(value) - pageBits(pc)); break; - } case ARM64_RELOC_PAGEOFF12: case ARM64_RELOC_GOT_LOAD_PAGEOFF12: case ARM64_RELOC_TLVP_LOAD_PAGEOFF12: assert(!r.pcrel); - value = encodePageOff12(base, value); + encodePageOff12(loc32, base, value); break; default: llvm_unreachable("unexpected relocation type"); } - - switch (r.length) { - case 2: - write32le(loc, value); - break; - case 3: - write64le(loc, value); - break; - default: - llvm_unreachable("invalid r_length"); - } } void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const { diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -102,9 +102,9 @@ switch (r.length) { case 2: if (r.type == X86_64_RELOC_UNSIGNED) - checkUInt(r, value, 32); + checkUInt(loc, r, value, 32); else - checkInt(r, value, 32); + checkInt(loc, r, value, 32); write32le(loc, value); break; case 3: @@ -127,7 +127,7 @@ static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr, uint64_t bufOff, uint64_t destAddr) { uint64_t rip = bufAddr + bufOff; - checkInt(d, destAddr - rip, 32); + checkInt(buf, d, destAddr - rip, 32); // For the instructions we care about, the RIP-relative address is always // stored in the last 4 bytes of the instruction. write32le(buf + bufOff - 4, destAddr - rip); diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -70,28 +70,28 @@ * v: The value the relocation is attempting to encode * bits: The number of bits actually available to encode this relocation */ -void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits, - int64_t min, uint64_t max); +void reportRangeError(void *loc, const Reloc &, const llvm::Twine &v, + uint8_t bits, int64_t min, uint64_t max); struct SymbolDiagnostic { const Symbol *symbol; llvm::StringRef reason; }; -void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits, - int64_t min, uint64_t max); +void reportRangeError(void *loc, SymbolDiagnostic, const llvm::Twine &v, + uint8_t bits, int64_t min, uint64_t max); template -inline void checkInt(Diagnostic d, int64_t v, int bits) { +inline void checkInt(void *loc, Diagnostic d, int64_t v, int bits) { if (v != llvm::SignExtend64(v, bits)) - reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits), + reportRangeError(loc, d, llvm::Twine(v), bits, llvm::minIntN(bits), llvm::maxIntN(bits)); } template -inline void checkUInt(Diagnostic d, uint64_t v, int bits) { +inline void checkUInt(void *loc, Diagnostic d, uint64_t v, int bits) { if ((v >> bits) != 0) - reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits)); + reportRangeError(loc, d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits)); } inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) { diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp --- a/lld/MachO/Relocations.cpp +++ b/lld/MachO/Relocations.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Relocations.h" +#include "ConcatOutputSection.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -38,19 +39,65 @@ return valid; } -void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits, - int64_t min, uint64_t max) { +// Given an offset in the output buffer, figure out which ConcatInputSection (if +// any) maps to it. At the same time, update the offset such that it is relative +// to the InputSection rather than to the output buffer. +// +// Obtaining the InputSection allows us to have better error diagnostics. +// However, many of our relocation-handling methods do not take the InputSection +// as a parameter. Since we are already passing the buffer offsets to our Target +// methods, this function allows us to emit better errors without threading an +// additional InputSection argument through the call stack. +// +// This is implemented as a slow linear search through OutputSegments, +// OutputSections, and finally the InputSections themselves. However, this +// function should be called only on error paths, so some overhead is fine. +static InputSection *offsetToInputSection(uint64_t *off) { + for (OutputSegment *seg : outputSegments) { + if (*off < seg->fileOff || *off >= seg->fileOff + seg->fileSize) + continue; + + const std::vector §ions = seg->getSections(); + size_t osecIdx = 0; + for (; osecIdx < sections.size(); ++osecIdx) + if (*off < sections[osecIdx]->fileOff) + break; + assert(osecIdx > 0); + // We should be only calling this function on offsets that belong to + // ConcatOutputSections. + auto *osec = cast(sections[osecIdx - 1]); + *off -= osec->fileOff; + + size_t isecIdx = 0; + for (; isecIdx < osec->inputs.size(); ++isecIdx) { + const ConcatInputSection *isec = osec->inputs[isecIdx]; + if (*off < isec->outSecOff) + break; + } + assert(isecIdx > 0); + ConcatInputSection *isec = osec->inputs[isecIdx - 1]; + *off -= isec->outSecOff; + return isec; + } + return nullptr; +} + +void macho::reportRangeError(void *loc, const Reloc &r, const Twine &v, + uint8_t bits, int64_t min, uint64_t max) { std::string hint; + uint64_t off = reinterpret_cast(loc) - in.bufferStart; + const InputSection *isec = offsetToInputSection(&off); + std::string locStr = isec ? isec->getLocation(off) : "(invalid location)"; if (auto *sym = r.referent.dyn_cast()) hint = "; references " + toString(*sym); - // TODO: get location of reloc using something like LLD-ELF's getErrorPlace() - error("relocation " + target->getRelocAttrs(r.type).name + + error(locStr + ": relocation " + target->getRelocAttrs(r.type).name + " is out of range: " + v + " is not in [" + Twine(min) + ", " + Twine(max) + "]" + hint); } -void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits, - int64_t min, uint64_t max) { +void macho::reportRangeError(void *loc, SymbolDiagnostic d, const Twine &v, + uint8_t bits, int64_t min, uint64_t max) { + // FIXME: should we use `loc` somehow to provide a better error message? std::string hint; if (d.symbol) hint = "; references " + toString(*d.symbol); diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -595,6 +595,7 @@ }; struct InStruct { + const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; CStringSection *cStringSection = nullptr; WordLiteralSection *wordLiteralSection = nullptr; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1048,10 +1048,10 @@ FileOutputBuffer::F_executable); if (!bufferOrErr) - error("failed to open " + config->outputFile + ": " + + fatal("failed to open " + config->outputFile + ": " + llvm::toString(bufferOrErr.takeError())); - else - buffer = std::move(*bufferOrErr); + buffer = std::move(*bufferOrErr); + in.bufferStart = buffer->getBufferStart(); } void Writer::writeSections() { diff --git a/lld/test/MachO/invalid/range-check.s b/lld/test/MachO/invalid/range-check.s --- a/lld/test/MachO/invalid/range-check.s +++ b/lld/test/MachO/invalid/range-check.s @@ -6,8 +6,8 @@ # RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib # RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s -# CHECK: error: relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo -# CHECK: error: relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo +# CHECK: error: {{.*}}test.o:(symbol _main+0xd): relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo +# CHECK: error: {{.*}}test.o:(symbol _main+0x3): relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo # CHECK: error: stub is out of range: [[#]] is not in [-2147483648, 2147483647]; references _bar # CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647] # CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]