diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp new file mode 100644 --- /dev/null +++ b/lld/ELF/Arch/LoongArch.cpp @@ -0,0 +1,683 @@ +//===- LoongArch.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "OutputSections.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; + +// The LoongArch `pcalau12i` is behaviorally equivalent to the AArch64 `adrp`, +// so a similar concept of "page" also applies here. A "page" is in fact just +// another way to refer to the 12-bit range allowed by the immediate field of +// the addi/ld/st instructions. +// +// Note: "pcalau12i" stands for something like "PC aligned add upper, from 12th +// bit, immediate"; LoongArch instructions have no canonical full names, +// neither English nor Chinese ones, according to the ISA manual. +static uint64_t getLoongArchPage(uint64_t p) { + return p & ~static_cast(0xFFF); +} + +// Calculate the adjusted page offset between dest and PC. +// +// We must specially handle the cases when the low 12 bits of dest are seen +// as negative, because the instructions consuming it (ld, st, addi, etc.) +// all sign-extend the immediate, unlike AArch64. The higher bits need +// tweaking too, due to potential usage in patterns like: +// +// pcalau12i A, %foo_hi20(sym) +// addi.d T, zero, %foo_lo12(sym) +// lu32i.d T, %foo64_lo20(sym) +// lu52i.d T, T, %foo64_hi12(sym) +// ldx.d A, A, T +// +// in which case the "pc + hi20" part is separately constructed from the rest +// which includes the higher 32-bit half and lo12, so the higher 32 bits need a +// nudge too, due to the signed addition performed by the ldx/stx. +uint64_t elf::getLoongArchPageOffset(uint64_t dest, uint64_t pc) { + uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); + if ((dest & 0xfff) > 0x7ff) { + result += 0x1000; + result -= 0x100000000; + } + return result; +} + +namespace { + +class LoongArch final : public TargetInfo { +public: + LoongArch(); + uint32_t calcEFlags() const override; + int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; + void writeGotHeader(uint8_t *buf) const override; + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; + void writePltHeader(uint8_t *buf) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + RelType getDynRel(RelType type) const override; + RelExpr getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const override; + bool usesOnlyLowPageBits(RelType type) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; +}; + +} // end anonymous namespace + +const uint64_t dtpOffset = 0; + +enum Op { + SUB_W = 0x00110000, + SUB_D = 0x00118000, + BREAK = 0x002a0000, + SRLI_W = 0x00448000, + SRLI_D = 0x00450000, + ADDI_W = 0x02800000, + ADDI_D = 0x02c00000, + ANDI = 0x03400000, + PCADDU12I = 0x1c000000, + LD_W = 0x28800000, + LD_D = 0x28c00000, + JIRL = 0x4c000000, +}; + +enum Reg { + R_ZERO = 0, + R_RA = 1, + R_TP = 2, + R_T0 = 12, + R_T1 = 13, + R_T2 = 14, + R_T3 = 15, +}; + +static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } +static uint32_t lo12(uint32_t val) { return val & 0xfff; } + +static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) { + return op | d | (j << 5) | (k << 10); +} + +// Extract bits v[begin:end], where range is inclusive. +static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { + return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; +} + +static uint32_t setD5k16(uint32_t insn, uint32_t imm) { + uint32_t immLo = extractBits(imm, 15, 0); + uint32_t immHi = extractBits(imm, 20, 16); + return (insn & 0xfc0003e0) | (immLo << 10) | immHi; +} + +static uint32_t setD10k16(uint32_t insn, uint32_t imm) { + uint32_t immLo = extractBits(imm, 15, 0); + uint32_t immHi = extractBits(imm, 25, 16); + return (insn & 0xfc000000) | (immLo << 10) | immHi; +} + +static uint32_t setJ20(uint32_t insn, uint32_t imm) { + return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5); +} + +static uint32_t setK12(uint32_t insn, uint32_t imm) { + return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10); +} + +static uint32_t setK16(uint32_t insn, uint32_t imm) { + return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10); +} + +static bool isJirl(uint32_t insn) { + return (insn & 0xfc000000) == JIRL; +} + +LoongArch::LoongArch() { + // The LoongArch ISA itself does not have a limit on page sizes. According to + // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is + // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to + // "unlimited". + // However, practically the maximum usable page size is constrained by the + // kernel implementation, and 64KiB is the biggest non-huge page size + // supported by Linux as of v6.1. + defaultMaxPageSize = 65536; + write32(trapInstr.data(), BREAK); // break 0 + + copyRel = R_LARCH_COPY; + pltRel = R_LARCH_JUMP_SLOT; + relativeRel = R_LARCH_RELATIVE; + iRelativeRel = R_LARCH_IRELATIVE; + + if (config->is64) { + symbolicRel = R_LARCH_64; + tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; + tlsOffsetRel = R_LARCH_TLS_DTPREL64; + tlsGotRel = R_LARCH_TLS_TPREL64; + } else { + symbolicRel = R_LARCH_32; + tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; + tlsOffsetRel = R_LARCH_TLS_DTPREL32; + tlsGotRel = R_LARCH_TLS_TPREL32; + } + + gotRel = symbolicRel; + + // .got[0] = _DYNAMIC + gotHeaderEntriesNum = 1; + + // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map + gotPltHeaderEntriesNum = 2; + + pltHeaderSize = 32; + pltEntrySize = 16; + ipltEntrySize = 16; +} + +static uint32_t getEFlags(InputFile *f) { + if (config->is64) + return cast>(f)->getObj().getHeader().e_flags; + return cast>(f)->getObj().getHeader().e_flags; +} + +uint32_t LoongArch::calcEFlags() const { + // If there are only binary input files (from -b binary), use a + // value of 0 for the ELF header flags. + if (ctx.objectFiles.empty()) + return 0; + + uint32_t target = getEFlags(ctx.objectFiles.front()); + + for (InputFile *f : ctx.objectFiles) { + if ((getEFlags(f) & EF_LOONGARCH_ABI_MODIFIER_MASK) != + (target & EF_LOONGARCH_ABI_MODIFIER_MASK)) + error(toString(f) + + ": cannot link object files with different ABI"); + + // We cannot process psABI v1.x / object ABI v0 files (containing stack + // relocations), unlike ld.bfd. + // + // Instead of blindly accepting every v0 object and only failing at + // relocation processing time, just disallow interlink altogether. We + // don't expect significant usage of object ABI v0 in the wild (the old + // world may continue using object ABI v0 for a while, but as it's not + // binary-compatible with the upstream i.e. new-world ecosystem, it's not + // being considered here). + // + // There are briefly some new-world systems with object ABI v0 binaries too. + // It is because these programs were built before v1 was finalized. + // These are not supported either due to the extremely small number of them, + // and the few impacted users are advised to simply rebuild world or + // reinstall a recent system. + if ((getEFlags(f) & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1) + error(toString(f) + ": unsupported object file ABI version"); + } + + return target; +} + +int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { + switch (type) { + default: + internalLinkerError(getErrorLocation(buf), + "cannot read addend for relocation " + toString(type)); + return 0; + case R_LARCH_32: + case R_LARCH_TLS_DTPMOD32: + case R_LARCH_TLS_DTPREL32: + case R_LARCH_TLS_TPREL32: + return SignExtend64<32>(read32le(buf)); + case R_LARCH_64: + case R_LARCH_TLS_DTPMOD64: + case R_LARCH_TLS_DTPREL64: + case R_LARCH_TLS_TPREL64: + return read64le(buf); + case R_LARCH_RELATIVE: + case R_LARCH_IRELATIVE: + return config->is64 ? read64le(buf) : read32le(buf); + case R_LARCH_NONE: + case R_LARCH_JUMP_SLOT: + // These relocations are defined as not having an implicit addend. + return 0; + } +} + +void LoongArch::writeGotHeader(uint8_t *buf) const { + if (config->is64) + write64le(buf, mainPart->dynamic->getVA()); + else + write32le(buf, mainPart->dynamic->getVA()); +} + +void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { + if (config->is64) + write64le(buf, in.plt->getVA()); + else + write32le(buf, in.plt->getVA()); +} + +void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const { + if (config->writeAddends) { + if (config->is64) + write64le(buf, s.getVA()); + else + write32le(buf, s.getVA()); + } +} + +void LoongArch::writePltHeader(uint8_t *buf) const { + // The LoongArch PLT is currently structured just like that of RISCV. + // Annoyingly, this means the PLT is still using `pcaddu12i` to perform + // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`), + // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that + // is used everywhere else involving PC-relative operations in the LoongArch + // ELF psABI v2.00. + // The `pcrel_{hi20,lo12}` operators are illustrative only and not really + // supported by LoongArch assemblers. + // + // 1: pcaddu12i $t2, %pcrel_hi20(.got.plt) + // sub.[wd] $t1, $t1, $t3 + // ld.[wd] $t3, $t2, %pcrel_lo12(1b) ; t3 = _dl_runtime_resolve + // addi.[wd] $t1, $t1, -pltHeaderSize-12; t1 = &.plt[i] - &.plt[0] + // addi.[wd] $t0, $t2, %pcrel_lo12(1b) + // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0] + // ld.[wd] $t0, $t0, Wordsize(t0) ; t0 = link_map + // jr $t3 + uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); + uint32_t sub = config->is64 ? SUB_D : SUB_W; + uint32_t ld = config->is64 ? LD_D : LD_W; + uint32_t addi = config->is64 ? ADDI_D : ADDI_W; + uint32_t srli = config->is64 ? SRLI_D : SRLI_W; + write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0)); + write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3)); + write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset))); + write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12))); + write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset))); + write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2)); + write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize)); + write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0)); +} + +void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + // See the comment in writePltHeader for reason why pcaddu12i is used instead + // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days. + // + // 1: pcaddu12i $t3, %pcrel_hi20(f@.got.plt) + // ld.[wd] $t3, $t3, %pcrel_lo12(1b) + // jirl $t1, $t3, 0 + // nop + uint32_t offset = sym.getGotPltVA() - pltEntryAddr; + write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0)); + write32le(buf + 4, + insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset))); + write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0)); + write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0)); +} + +RelType LoongArch::getDynRel(RelType type) const { + return type == target->symbolicRel ? type + : static_cast(R_LARCH_NONE); +} + +RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, + const uint8_t *loc) const { + switch (type) { + case R_LARCH_NONE: + case R_LARCH_MARK_LA: + case R_LARCH_MARK_PCREL: + return R_NONE; + case R_LARCH_32: + case R_LARCH_64: + case R_LARCH_ABS_HI20: + case R_LARCH_ABS_LO12: + case R_LARCH_ABS64_LO20: + case R_LARCH_ABS64_HI12: + return R_ABS; + case R_LARCH_PCALA_LO12: + // This may well be just R_ABS and nothing more, but unfortunately some + // people had the brilliant idea of reusing the R_LARCH_PCALA_LO12 reloc on + // JIRLs, for implementing the medium code model, seemingly for avoiding + // having to add another reloc type just for that. The intended use case is + // for making the libc_nonshared.a of glibc 2.37+ usable with large + // applications like Firefox or Chromium, where an R_LARCH_B26 will overflow + // otherwise. See [1] for details. + // + // This is bad for multiple reasons: + // + // * R_LARCH_PCALA_LO12 just places its computed value unmodified, but JIRL + // expects its immediate to be pre-shifted by 2 bits, making the reloc + // behavior dependent on the underlying input section content; + // * R_LARCH_PCALA_LO12 leaves the upper 16-12+2=6 bits of JIRL's immediate + // operand unusable, making the usage forever a kludge: a proper reloc + // type has to be added *anyway*, to be able to leverage the PCADDU18I + + // JIRL combo, for example. + // (In this case an ISA revision (!) is arguably necessary, because there + // is not a PCALAU18I for stylistic consistency with the other psABI v2.00 + // relocs. If PCADDU18I is to be used anyway, that would mean the new + // relocs are PCREL-style instead of PCALA; so for consistency, PCREL + // relocs for the other psABI v2.00 relocs should be added as well...) + // * Inspection into the input section must be performed for every + // R_LARCH_PCALA_LO12 record, which is one of the most frequently used + // LoongArch reloc types, but usage of the medium code model is not nearly + // as popular, so arguably ~100% of the checks are in fact wasted for very + // little gain. + // + // The problem is long known, and already reported with a suggested fix + // back in September 2022 [2]. But the pull request has not received any + // comment since then, and object code with this usage will soon appear with + // the release of glibc 2.37, so we have no choice but to carry the same + // kind of workaround as the BFD linker does. + // + // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a + // [2]: https://github.com/loongson/LoongArch-Documentation/pull/69 + return isJirl(read32le(loc)) ? R_PLT : R_ABS; + case R_LARCH_TLS_DTPREL32: + case R_LARCH_TLS_DTPREL64: + return R_DTPREL; + case R_LARCH_TLS_TPREL32: + case R_LARCH_TLS_TPREL64: + case R_LARCH_TLS_LE_HI20: + case R_LARCH_TLS_LE_LO12: + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_LE64_HI12: + return R_TPREL; + case R_LARCH_ADD8: + case R_LARCH_ADD16: + case R_LARCH_ADD32: + case R_LARCH_ADD64: + case R_LARCH_SUB8: + case R_LARCH_SUB16: + case R_LARCH_SUB32: + case R_LARCH_SUB64: + // The LoongArch add/sub relocs are meant to behave the same as the RISCV + // counterparts; reuse its RelExpr to avoid having to define one more + // RelExpr type just to duplicate everything afterwards. + return R_RISCV_ADD; + case R_LARCH_32_PCREL: + return R_PC; + case R_LARCH_B16: + case R_LARCH_B21: + case R_LARCH_B26: + return R_PLT_PC; + case R_LARCH_GOT_PC_HI20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_GOT64_PC_HI12: + return R_LOONGARCH_GOT_PAGE_PC; + case R_LARCH_GOT_PC_LO12: + return R_LOONGARCH_GOT; + case R_LARCH_TLS_LD_PC_HI20: + case R_LARCH_TLS_GD_PC_HI20: + return R_LOONGARCH_GOT_TLSGD_PAGE_PC; + case R_LARCH_TLS_IE_PC_HI20: + case R_LARCH_TLS_IE64_PC_LO20: + case R_LARCH_TLS_IE64_PC_HI12: + return R_LOONGARCH_GOT_TLSIE_PAGE_PC; + case R_LARCH_TLS_IE_PC_LO12: + return R_LOONGARCH_GOT_TLSIE; + case R_LARCH_PCALA_HI20: + // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT + // anyway so why waste time checking only to get everything relaxed back to + // it? + // + // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want + // both the HI20 and LO12 to potentially refer to the PLT. But in reality + // the HI20 reloc appears earlier, and the relocs don't contain enough + // information to let us properly resolve semantics per symbol. + // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20 + // relocs, hence it is nearly impossible to 100% accurately determine each + // HI20's "flavor" without taking big performance hits, in the presence of + // edge cases (e.g. HI20 without pairing LO12, paired LO12 placed so far + // apart that relationship is not certain anymore), and programmer mistakes + // (e.g. as outlined in https://github.com/loongson/LoongArch-Documentation/pull/69). + // + // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark + // every HI20 reloc referring to the same symbol differently; this is not + // feasible with the current function signature of getRelExpr that doesn't + // allow for such inter-pass state. + // + // So, unfortunately we have to again workaround this quirk the same way as + // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT needing, only + // relaxing to R_LOONGARCH_PAGE_PC if it's known not so at a later stage. + return R_LOONGARCH_PLT_PAGE_PC; + case R_LARCH_PCALA64_LO20: + case R_LARCH_PCALA64_HI12: + return R_LOONGARCH_PAGE_PC; + case R_LARCH_GOT_HI20: + case R_LARCH_GOT_LO12: + case R_LARCH_GOT64_LO20: + case R_LARCH_GOT64_HI12: + case R_LARCH_TLS_IE_HI20: + case R_LARCH_TLS_IE_LO12: + case R_LARCH_TLS_IE64_LO20: + case R_LARCH_TLS_IE64_HI12: + return R_GOT; + case R_LARCH_TLS_LD_HI20: + return R_TLSLD_GOT; + case R_LARCH_TLS_GD_HI20: + return R_TLSGD_GOT; + case R_LARCH_RELAX: + // LoongArch linker relaxation is not defined yet. + return R_NONE; + + // A stack machine (read: global mutable state) is necessary for properly + // computing these relocs, and these relocs are already deprecated after + // the release of LoongArch ELF psABI v2.00, so we are not going to + // implement them. + case R_LARCH_SOP_PUSH_PCREL: + case R_LARCH_SOP_PUSH_ABSOLUTE: + case R_LARCH_SOP_PUSH_DUP: + case R_LARCH_SOP_PUSH_GPREL: + case R_LARCH_SOP_PUSH_TLS_TPREL: + case R_LARCH_SOP_PUSH_TLS_GOT: + case R_LARCH_SOP_PUSH_TLS_GD: + case R_LARCH_SOP_PUSH_PLT_PCREL: + case R_LARCH_SOP_ASSERT: + case R_LARCH_SOP_NOT: + case R_LARCH_SOP_SUB: + case R_LARCH_SOP_SL: + case R_LARCH_SOP_SR: + case R_LARCH_SOP_ADD: + case R_LARCH_SOP_AND: + case R_LARCH_SOP_IF_ELSE: + case R_LARCH_SOP_POP_32_S_10_5: + case R_LARCH_SOP_POP_32_U_10_12: + case R_LARCH_SOP_POP_32_S_10_12: + case R_LARCH_SOP_POP_32_S_10_16: + case R_LARCH_SOP_POP_32_S_10_16_S2: + case R_LARCH_SOP_POP_32_S_5_20: + case R_LARCH_SOP_POP_32_S_0_5_10_16_S2: + case R_LARCH_SOP_POP_32_S_0_10_10_16_S2: + case R_LARCH_SOP_POP_32_U: + // Nor are we implementing these two reloc types that were probably added + // without much thought, and already proposed to be removed. + // See https://github.com/loongson/LoongArch-Documentation/issues/51 + case R_LARCH_ADD24: + case R_LARCH_SUB24: + // Similarly for these two, long deprecated and unused even before the + // inception of LoongArch. + case R_LARCH_GNU_VTINHERIT: + case R_LARCH_GNU_VTENTRY: + error(getErrorLocation(loc) + + "cannot handle deprecated relocation " + toString(type) + + " against symbol " + toString(s)); + return R_NONE; + + default: + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; + } +} + +bool LoongArch::usesOnlyLowPageBits(RelType type) const { + switch (type) { + default: + return false; + case R_LARCH_PCALA_LO12: + case R_LARCH_GOT_LO12: + case R_LARCH_GOT_PC_LO12: + case R_LARCH_TLS_IE_PC_LO12: + return true; + } +} + +void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { + case R_LARCH_32_PCREL: + checkInt(loc, val, 32, rel); + [[fallthrough]]; + case R_LARCH_32: + write32le(loc, val); + return; + case R_LARCH_64: + write64le(loc, val); + return; + + case R_LARCH_B16: { + checkInt(loc, val, 18, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setK16(read32le(loc), val >> 2)); + return; + } + + case R_LARCH_B21: { + checkInt(loc, val, 23, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setD5k16(read32le(loc), val >> 2)); + return; + } + + case R_LARCH_B26: { + checkInt(loc, val, 28, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setD10k16(read32le(loc), val >> 2)); + return; + } + + // Relocs intended for `addi`, `ld` or `st`. + case R_LARCH_ABS_LO12: + case R_LARCH_PCALA_LO12: + case R_LARCH_GOT_PC_LO12: + case R_LARCH_GOT_LO12: + case R_LARCH_TLS_LE_LO12: + case R_LARCH_TLS_IE_PC_LO12: + case R_LARCH_TLS_IE_LO12: { + // Annoyingly, we have to again inspect the insn word to handle the + // R_LARCH_PCALA_LO12 on JIRL case: JIRL wants its immediate's 2 lowest + // zeroes removed by us, and the immediate slot width is in fact different. + // In this case, process like an R_LARCH_B16 without the overflow check, and + // only taking the value's lowest 12 bits. + if (rel.type == R_LARCH_PCALA_LO12 && isJirl(read32le(loc))) { + checkAlignment(loc, val, 4, rel); + val = SignExtend64<12>(val); + write32le(loc, setK16(read32le(loc), val >> 2)); + return; + } + write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0))); + return; + } + + // Relocs intended for `lu12i.w` or `pcalau12i`. + case R_LARCH_ABS_HI20: + case R_LARCH_PCALA_HI20: + case R_LARCH_GOT_PC_HI20: + case R_LARCH_GOT_HI20: + case R_LARCH_TLS_LE_HI20: + case R_LARCH_TLS_IE_PC_HI20: + case R_LARCH_TLS_IE_HI20: + case R_LARCH_TLS_LD_PC_HI20: + case R_LARCH_TLS_LD_HI20: + case R_LARCH_TLS_GD_PC_HI20: + case R_LARCH_TLS_GD_HI20: { + write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12))); + return; + } + + // Relocs intended for `lu32i.d`. + case R_LARCH_ABS64_LO20: + case R_LARCH_PCALA64_LO20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_GOT64_LO20: + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_IE64_PC_LO20: + case R_LARCH_TLS_IE64_LO20: { + write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32))); + return; + } + + // Relocs intended for `lu52i.d`. + case R_LARCH_ABS64_HI12: + case R_LARCH_PCALA64_HI12: + case R_LARCH_GOT64_PC_HI12: + case R_LARCH_GOT64_HI12: + case R_LARCH_TLS_LE64_HI12: + case R_LARCH_TLS_IE64_PC_HI12: + case R_LARCH_TLS_IE64_HI12: { + write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); + return; + } + + case R_LARCH_ADD8: + *loc += val; + return; + case R_LARCH_ADD16: + write16le(loc, read16le(loc) + val); + return; + case R_LARCH_ADD32: + write32le(loc, read32le(loc) + val); + return; + case R_LARCH_ADD64: + write64le(loc, read64le(loc) + val); + return; + case R_LARCH_SUB8: + *loc -= val; + return; + case R_LARCH_SUB16: + write16le(loc, read16le(loc) - val); + return; + case R_LARCH_SUB32: + write32le(loc, read32le(loc) - val); + return; + case R_LARCH_SUB64: + write64le(loc, read64le(loc) - val); + return; + + case R_LARCH_TLS_DTPREL32: + write32le(loc, val - dtpOffset); + break; + case R_LARCH_TLS_DTPREL64: + write64le(loc, val - dtpOffset); + break; + + case R_LARCH_MARK_LA: + case R_LARCH_MARK_PCREL: + // no-op + return; + + case R_LARCH_RELAX: + return; // Ignored (for now) + + default: + llvm_unreachable("unknown relocation"); + } +} + +TargetInfo *elf::getLoongArchTargetInfo() { + static LoongArch target; + return ⌖ +} diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -25,6 +25,7 @@ Arch/ARM.cpp Arch/AVR.cpp Arch/Hexagon.cpp + Arch/LoongArch.cpp Arch/Mips.cpp Arch/MipsArchTree.cpp Arch/MSP430.cpp diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -167,6 +167,7 @@ .Case("elf32lriscv", {ELF32LEKind, EM_RISCV}) .Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC}) .Cases("elf32lppc", "elf32lppclinux", {ELF32LEKind, EM_PPC}) + .Case("elf32loongarch", {ELF32LEKind, EM_LOONGARCH}) .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) .Case("elf64lriscv", {ELF64LEKind, EM_RISCV}) @@ -178,6 +179,7 @@ .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9}) .Case("msp430elf", {ELF32LEKind, EM_MSP430}) .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU}) + .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) @@ -1032,8 +1034,9 @@ // Otherwise use the psABI defined relocation entry format. uint16_t m = config->emachine; - return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC || - m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64; + return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || + m == EM_LOONGARCH || m == EM_PPC || m == EM_PPC64 || m == EM_RISCV || + m == EM_X86_64; } static void parseClangOption(StringRef opt, const Twine &msg) { @@ -1570,8 +1573,9 @@ // have support for reading Elf_Rel addends, so we only enable for a subset. #ifndef NDEBUG bool checkDynamicRelocsDefault = m == EM_AARCH64 || m == EM_ARM || - m == EM_386 || m == EM_MIPS || - m == EM_X86_64 || m == EM_RISCV; + m == EM_386 || m == EM_LOONGARCH || + m == EM_MIPS || m == EM_RISCV || + m == EM_X86_64; #else bool checkDynamicRelocsDefault = false; #endif diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1534,6 +1534,9 @@ return EM_AVR; case Triple::hexagon: return EM_HEXAGON; + case Triple::loongarch32: + case Triple::loongarch64: + return EM_LOONGARCH; case Triple::mips: case Triple::mipsel: case Triple::mips64: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/xxhash.h" #include +#include #include #include @@ -609,6 +610,7 @@ // to allow a signed 16-bit offset to reach 0x1000 of TCB/thread-library // data and 0xf000 of the program's TLS segment. return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000; + case EM_LOONGARCH: case EM_RISCV: return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); @@ -643,6 +645,23 @@ case R_GOT: case R_RELAX_TLS_GD_TO_IE_ABS: return sym.getGotVA() + a; + case R_LOONGARCH_GOT: + case R_LOONGARCH_GOT_TLSIE: + // The LoongArch PC-relative GOT relocs are notorious, in that the TLS + // relocs share the same reloc type with non-TLS ones for their page + // offsets. The arithmetics are different in the TLS case so we have to + // duplicate some logic here. + if (sym.isTls()) { + if (sym.hasFlag(NEEDS_TLSGD)) { + // Like R_LOONGARCH_TLSGD_PAGE_PC but taking the absolute value. + return in.got->getGlobalDynAddr(sym) + a; + } + if (ctx.needsTlsLd.load(std::memory_order_relaxed)) + // Like R_TLSLD_PC but taking the absolute value. + return in.got->getTlsIndexVA() + a; + // Fallthrough; relocate like R_GOT. + } + return sym.getGotVA() + a; case R_GOTONLY_PC: return in.got->getVA() + a - p; case R_GOTPLTONLY_PC: @@ -667,6 +686,9 @@ case R_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return sym.getGotVA() + a - p; + case R_LOONGARCH_GOT_PAGE_PC: + case R_LOONGARCH_GOT_TLSIE_PAGE_PC: + return getLoongArchPageOffset(sym.getGotVA() + a, p); case R_MIPS_GOTREL: return sym.getVA(a) - in.mipsGot->getGp(file); case R_MIPS_GOT_GP: @@ -715,6 +737,8 @@ *hiRel->sym, hiRel->expr); return 0; } + case R_LOONGARCH_PAGE_PC: + return getLoongArchPageOffset(sym.getVA(a), p); case R_PC: case R_ARM_PCA: { uint64_t dest; @@ -748,6 +772,8 @@ case R_PLT_PC: case R_PPC64_CALL_PLT: return sym.getPltVA() + a - p; + case R_LOONGARCH_PLT_PAGE_PC: + return getLoongArchPageOffset(sym.getPltVA() + a, p); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); case R_PPC32_PLTREL: @@ -808,6 +834,8 @@ return in.got->getGlobalDynAddr(sym) + a - in.gotPlt->getVA(); case R_TLSGD_PC: return in.got->getGlobalDynAddr(sym) + a - p; + case R_LOONGARCH_GOT_TLSGD_PAGE_PC: + return getLoongArchPageOffset(in.got->getGlobalDynAddr(sym) + a, p); case R_TLSLD_GOTPLT: return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); case R_TLSLD_GOT: diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -102,6 +102,17 @@ R_PPC64_RELAX_GOT_PC, R_RISCV_ADD, R_RISCV_PC_INDIRECT, + // Same as R_PC but with page-aligned semantics. + R_LOONGARCH_PAGE_PC, + // Same as R_PLT_PC but with page-aligned semantics. + R_LOONGARCH_PLT_PAGE_PC, + // LoongArch GOT relocs are also reused for TLS, so the required semantics + // differ from everyone else. + R_LOONGARCH_GOT, + R_LOONGARCH_GOT_TLSIE, + R_LOONGARCH_GOT_PAGE_PC, + R_LOONGARCH_GOT_TLSGD_PAGE_PC, + R_LOONGARCH_GOT_TLSIE_PAGE_PC, }; // Architecture-neutral representation of relocation. diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -190,8 +190,8 @@ // Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr expr) { - return oneof( - expr); + return oneof(expr); } // Returns true if Expr refers a GOT entry. Note that this function @@ -200,7 +200,9 @@ static bool needsGot(RelExpr expr) { return oneof(expr); + R_AARCH64_GOT_PAGE, R_LOONGARCH_GOT, R_LOONGARCH_GOT_TLSIE, + R_LOONGARCH_GOT_PAGE_PC, R_LOONGARCH_GOT_TLSGD_PAGE_PC, + R_LOONGARCH_GOT_TLSIE_PAGE_PC>(expr); } // True if this expression is of the form Sym - X, where X is a position in the @@ -208,12 +210,15 @@ static bool isRelExpr(RelExpr expr) { return oneof(expr); + R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC, + R_LOONGARCH_PAGE_PC>(expr); } static RelExpr toPlt(RelExpr expr) { switch (expr) { + case R_LOONGARCH_PAGE_PC: + return R_LOONGARCH_PLT_PAGE_PC; case R_PPC64_CALL: return R_PPC64_CALL_PLT; case R_PC: @@ -232,6 +237,8 @@ case R_PLT_PC: case R_PPC32_PLTREL: return R_PC; + case R_LOONGARCH_PLT_PAGE_PC: + return R_LOONGARCH_PAGE_PC; case R_PPC64_CALL_PLT: return R_PPC64_CALL; case R_PLT: @@ -946,7 +953,9 @@ R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) + R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, + R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, R_LOONGARCH_GOT_TLSIE, + R_LOONGARCH_GOT_PAGE_PC, R_LOONGARCH_GOT_TLSIE_PAGE_PC>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1082,7 +1091,8 @@ bool canWrite = (sec->flags & SHF_WRITE) || !config->zText; if (canWrite) { RelType rel = target->getDynRel(type); - if (expr == R_GOT || (rel == target->symbolicRel && !sym.isPreemptible)) { + if (oneof(expr) || + (rel == target->symbolicRel && !sym.isPreemptible)) { addRelativeReloc(*sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { @@ -1234,11 +1244,13 @@ return 1; } - // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For - // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable + // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE + // relaxation. + // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable // relaxation as well. bool toExecRelax = !config->shared && config->emachine != EM_ARM && config->emachine != EM_HEXAGON && + config->emachine != EM_LOONGARCH && config->emachine != EM_RISCV && !c.file->ppc64DisableTLSRelax; @@ -1255,8 +1267,7 @@ // being suitable for being dynamically loaded via dlopen. GOT[e0] is the // module index, with a special value of 0 for the current module. GOT[e1] is // unused. There only needs to be one module index entry. - if (oneof( - expr)) { + if (oneof(expr)) { // Local-Dynamic relocs can be relaxed to Local-Exec. if (toExecRelax) { c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type, @@ -1287,7 +1298,8 @@ } if (oneof(expr)) { + R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, + R_LOONGARCH_GOT_TLSGD_PAGE_PC>(expr)) { if (!toExecRelax) { sym.setFlags(NEEDS_TLSGD); c.addReloc({expr, type, offset, addend, &sym}); @@ -1307,8 +1319,8 @@ return target->getTlsGdRelaxSkip(type); } - if (oneof(expr)) { + if (oneof(expr)) { ctx.hasTlsIe.store(true, std::memory_order_relaxed); // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally // defined. diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -438,6 +438,8 @@ .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV}) .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9}) .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) + .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH}) + .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) .Default({ELFNoneKind, EM_NONE}); } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -172,6 +172,7 @@ TargetInfo *getARMTargetInfo(); TargetInfo *getAVRTargetInfo(); TargetInfo *getHexagonTargetInfo(); +TargetInfo *getLoongArchTargetInfo(); TargetInfo *getMSP430TargetInfo(); TargetInfo *getPPC64TargetInfo(); TargetInfo *getPPCTargetInfo(); @@ -215,6 +216,7 @@ void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); +uint64_t getLoongArchPageOffset(uint64_t dest, uint64_t pc); void riscvFinalizeRelax(int passes); void mergeRISCVAttributesSections(); diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -62,6 +62,8 @@ return getAVRTargetInfo(); case EM_HEXAGON: return getHexagonTargetInfo(); + case EM_LOONGARCH: + return getLoongArchTargetInfo(); case EM_MIPS: switch (config->ekind) { case ELF32LEKind: diff --git a/lld/test/ELF/loongarch-abs64.s b/lld/test/ELF/loongarch-abs64.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-abs64.s @@ -0,0 +1,60 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la64.o --defsym foo=0 --defsym bar=42 -o %t.la64.1 +# RUN: llvm-objdump -d %t.la64.1 | FileCheck --check-prefix=CASE1 %s +# CASE1: 04 00 00 14 lu12i.w $a0, 0 +# CASE1-NEXT: 84 00 80 03 ori $a0, $a0, 0 +# CASE1-NEXT: 04 00 00 16 lu32i.d $a0, 0 +# CASE1-NEXT: 84 00 00 03 lu52i.d $a0, $a0, 0 +# CASE1-NEXT: 05 00 00 14 lu12i.w $a1, 0 +# CASE1-NEXT: a5 a8 80 03 ori $a1, $a1, 42 +# CASE1-NEXT: 05 00 00 16 lu32i.d $a1, 0 +# CASE1-NEXT: a5 00 00 03 lu52i.d $a1, $a1, 0 + +# RUN: ld.lld %t.la64.o --defsym foo=0x12345678 --defsym bar=0x87654321 -o %t.la64.2 +# RUN: llvm-objdump -d %t.la64.2 | FileCheck --check-prefix=CASE2 %s +# CASE2: a4 68 24 14 lu12i.w $a0, 74565 +# CASE2-NEXT: 84 e0 99 03 ori $a0, $a0, 1656 +# CASE2-NEXT: 04 00 00 16 lu32i.d $a0, 0 +# CASE2-NEXT: 84 00 00 03 lu52i.d $a0, $a0, 0 +# CASE2-NEXT: 85 ca 0e 15 lu12i.w $a1, -493996 +# CASE2-NEXT: a5 84 8c 03 ori $a1, $a1, 801 +# CASE2-NEXT: 05 00 00 16 lu32i.d $a1, 0 +# CASE2-NEXT: a5 00 00 03 lu52i.d $a1, $a1, 0 + +# RUN: ld.lld %t.la64.o --defsym foo=0x12345fedcb678 --defsym bar=0xfedcb12345000 -o %t.la64.3 +# RUN: llvm-objdump -d %t.la64.3 | FileCheck --check-prefix=CASE3 %s +# CASE3: 64 b9 fd 15 lu12i.w $a0, -4661 +# CASE3-NEXT: 84 e0 99 03 ori $a0, $a0, 1656 +# CASE3-NEXT: a4 68 24 16 lu32i.d $a0, 74565 +# CASE3-NEXT: 84 00 00 03 lu52i.d $a0, $a0, 0 +# CASE3-NEXT: a5 68 24 14 lu12i.w $a1, 74565 +# CASE3-NEXT: a5 00 80 03 ori $a1, $a1, 0 +# CASE3-NEXT: 65 b9 fd 17 lu32i.d $a1, -4661 +# CASE3-NEXT: a5 00 00 03 lu52i.d $a1, $a1, 0 + +# RUN: ld.lld %t.la64.o --defsym foo=0xfffffeeeeeddd --defsym bar=0xfff00000f1111222 -o %t.la64.4 +# RUN: llvm-objdump -d %t.la64.4 | FileCheck --check-prefix=CASE4 %s +# CASE4: c4 dd dd 15 lu12i.w $a0, -69906 +# CASE4-NEXT: 84 74 b7 03 ori $a0, $a0, 3549 +# CASE4-NEXT: e4 ff ff 17 lu32i.d $a0, -1 +# CASE4-NEXT: 84 00 00 03 lu52i.d $a0, $a0, 0 +# CASE4-NEXT: 25 22 e2 15 lu12i.w $a1, -61167 +# CASE4-NEXT: a5 88 88 03 ori $a1, $a1, 546 +# CASE4-NEXT: 05 00 00 16 lu32i.d $a1, 0 +# CASE4-NEXT: a5 fc 3f 03 lu52i.d $a1, $a1, -1 + +.global _start + +_start: + lu12i.w $a0, %abs_hi20(foo) + ori $a0, $a0, %abs_lo12(foo) + lu32i.d $a0, %abs64_lo20(foo) + lu52i.d $a0, $a0, %abs64_hi12(foo) + + lu12i.w $a1, %abs_hi20(bar) + ori $a1, $a1, %abs_lo12(bar) + lu32i.d $a1, %abs64_lo20(bar) + lu52i.d $a1, $a1, %abs64_hi12(bar) diff --git a/lld/test/ELF/loongarch-branch-b16.s b/lld/test/ELF/loongarch-branch-b16.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-branch-b16.s @@ -0,0 +1,40 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o --defsym foo=_start+4 --defsym bar=_start -o %t.la32 +# RUN: ld.lld %t.la64.o --defsym foo=_start+4 --defsym bar=_start -o %t.la64 +# RUN: llvm-objdump -d %t.la32 | FileCheck %s --check-prefix=CHECK-32 +# RUN: llvm-objdump -d %t.la64 | FileCheck %s --check-prefix=CHECK-64 +# CHECK-32: 00 04 00 58 beq $zero, $zero, 4 +# CHECK-32: 00 fc ff 5f bne $zero, $zero, -4 +# CHECK-64: 00 04 00 58 beq $zero, $zero, 4 +# CHECK-64: 00 fc ff 5f bne $zero, $zero, -4 + +# RUN: ld.lld %t.la32.o --defsym foo=_start+0x1fffc --defsym bar=_start+4-0x20000 -o %t.la32.limits +# RUN: ld.lld %t.la64.o --defsym foo=_start+0x1fffc --defsym bar=_start+4-0x20000 -o %t.la64.limits +# RUN: llvm-objdump -d %t.la32.limits | FileCheck --check-prefix=LIMITS-32 %s +# RUN: llvm-objdump -d %t.la64.limits | FileCheck --check-prefix=LIMITS-64 %s +# LIMITS-32: 00 fc ff 59 beq $zero, $zero, 131068 +# LIMITS-32-NEXT: 00 00 00 5e bne $zero, $zero, -131072 +# LIMITS-64: 00 fc ff 59 beq $zero, $zero, 131068 +# LIMITS-64-NEXT: 00 00 00 5e bne $zero, $zero, -131072 + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+0x20000 --defsym bar=_start+4-0x20004 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-RANGE %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+0x20000 --defsym bar=_start+4-0x20004 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: relocation R_LARCH_B16 out of range: 131072 is not in [-131072, 131071]; references foo +# ERROR-RANGE: relocation R_LARCH_B16 out of range: -131076 is not in [-131072, 131071]; references bar + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+1 --defsym bar=_start-1 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-1 %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+1 --defsym bar=_start-1 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-1 %s +# ERROR-ALIGN-1: improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+2 --defsym bar=_start-2 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-2 %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+2 --defsym bar=_start-2 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-2 %s +# ERROR-ALIGN-2: improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes + +.global _start +_start: + beq $zero, $zero, foo + bne $zero, $zero, bar diff --git a/lld/test/ELF/loongarch-branch-b21.s b/lld/test/ELF/loongarch-branch-b21.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-branch-b21.s @@ -0,0 +1,40 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o --defsym foo=_start+4 --defsym bar=_start -o %t.la32 +# RUN: ld.lld %t.la64.o --defsym foo=_start+4 --defsym bar=_start -o %t.la64 +# RUN: llvm-objdump -d %t.la32 | FileCheck %s --check-prefix=CHECK-32 +# RUN: llvm-objdump -d %t.la64 | FileCheck %s --check-prefix=CHECK-64 +# CHECK-32: e0 07 00 40 beqz $s8, 4 +# CHECK-32: ff ff ff 47 bnez $s8, -4 +# CHECK-64: e0 07 00 40 beqz $s8, 4 +# CHECK-64: ff ff ff 47 bnez $s8, -4 + +# RUN: ld.lld %t.la32.o --defsym foo=_start+0x3ffffc --defsym bar=_start+4-0x400000 -o %t.la32.limits +# RUN: ld.lld %t.la64.o --defsym foo=_start+0x3ffffc --defsym bar=_start+4-0x400000 -o %t.la64.limits +# RUN: llvm-objdump -d %t.la32.limits | FileCheck --check-prefix=LIMITS-32 %s +# RUN: llvm-objdump -d %t.la64.limits | FileCheck --check-prefix=LIMITS-64 %s +# LIMITS-32: ef ff ff 43 beqz $s8, 4194300 +# LIMITS-32-NEXT: f0 03 00 44 bnez $s8, -4194304 +# LIMITS-64: ef ff ff 43 beqz $s8, 4194300 +# LIMITS-64-NEXT: f0 03 00 44 bnez $s8, -4194304 + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+0x400000 --defsym bar=_start+4-0x400004 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-RANGE %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+0x400000 --defsym bar=_start+4-0x400004 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: relocation R_LARCH_B21 out of range: 4194304 is not in [-4194304, 4194303]; references foo +# ERROR-RANGE: relocation R_LARCH_B21 out of range: -4194308 is not in [-4194304, 4194303]; references bar + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+1 --defsym bar=_start-1 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-1 %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+1 --defsym bar=_start-1 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-1 %s +# ERROR-ALIGN-1: improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+2 --defsym bar=_start-2 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-2 %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+2 --defsym bar=_start-2 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-2 %s +# ERROR-ALIGN-2: improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes + +.global _start +_start: + beqz $s8, foo + bnez $s8, bar diff --git a/lld/test/ELF/loongarch-branch-b26.s b/lld/test/ELF/loongarch-branch-b26.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-branch-b26.s @@ -0,0 +1,40 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o --defsym foo=_start+4 --defsym bar=_start -o %t.la32 +# RUN: ld.lld %t.la64.o --defsym foo=_start+4 --defsym bar=_start -o %t.la64 +# RUN: llvm-objdump -d %t.la32 | FileCheck %s --check-prefix=CHECK-32 +# RUN: llvm-objdump -d %t.la64 | FileCheck %s --check-prefix=CHECK-64 +# CHECK-32: 00 04 00 50 b 4 +# CHECK-32: ff ff ff 57 bl -4 +# CHECK-64: 00 04 00 50 b 4 +# CHECK-64: ff ff ff 57 bl -4 + +# RUN: ld.lld %t.la32.o --defsym foo=_start+0x7fffffc --defsym bar=_start+4-0x8000000 -o %t.la32.limits +# RUN: ld.lld %t.la64.o --defsym foo=_start+0x7fffffc --defsym bar=_start+4-0x8000000 -o %t.la64.limits +# RUN: llvm-objdump -d %t.la32.limits | FileCheck --check-prefix=LIMITS-32 %s +# RUN: llvm-objdump -d %t.la64.limits | FileCheck --check-prefix=LIMITS-64 %s +# LIMITS-32: ff fd ff 53 b 134217724 +# LIMITS-32-NEXT: 00 02 00 54 bl -134217728 +# LIMITS-64: ff fd ff 53 b 134217724 +# LIMITS-64-NEXT: 00 02 00 54 bl -134217728 + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+0x8000000 --defsym bar=_start+4-0x8000004 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-RANGE %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+0x8000000 --defsym bar=_start+4-0x8000004 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: relocation R_LARCH_B26 out of range: 134217728 is not in [-134217728, 134217727]; references foo +# ERROR-RANGE: relocation R_LARCH_B26 out of range: -134217732 is not in [-134217728, 134217727]; references bar + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+1 --defsym bar=_start-1 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-1 %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+1 --defsym bar=_start-1 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-1 %s +# ERROR-ALIGN-1: improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes + +# RUN: not ld.lld %t.la32.o --defsym foo=_start+2 --defsym bar=_start-2 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-2 %s +# RUN: not ld.lld %t.la64.o --defsym foo=_start+2 --defsym bar=_start-2 -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN-2 %s +# ERROR-ALIGN-2: improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes + +.global _start +_start: + b foo + bl bar diff --git a/lld/test/ELF/loongarch-elf-flags.s b/lld/test/ELF/loongarch-elf-flags.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-elf-flags.s @@ -0,0 +1,8 @@ +# REQUIRES: loongarch + +# RUN: echo -n "BLOB" > %t.binary +# RUN: ld.lld -m elf64loongarch -b binary %t.binary -o %t.out +# RUN: llvm-readobj -h %t.out | FileCheck %s + +# CHECK: Flags [ +# CHECK-NEXT: ] diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl-shared-error.s b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared-error.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared-error.s @@ -0,0 +1,14 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: not ld.lld %t.la32.o -shared -o %t.la32.so 2>&1 | FileCheck %s +# RUN: not ld.lld %t.la64.o -shared -o %t.la64.so 2>&1 | FileCheck %s + +.global foo +foo: + pcalau12i $t0, %pc_hi20(bar) + ld.w $t0, $t0, %pc_lo12(bar) + +# CHECK: error: relocation R_LARCH_PCALA_LO12 cannot be used against symbol 'bar'; recompile with -fPIC diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s @@ -0,0 +1,49 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/a.s -o %t/a.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.la64.o + +# RUN: ld.lld %t/a.la32.o -shared -T %t/a.t -o %t/a.la32.so +# RUN: ld.lld %t/a.la64.o -shared -T %t/a.t -o %t/a.la64.so + +# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la32.so | FileCheck --check-prefixes=DIS,DIS32 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la64.so | FileCheck --check-prefixes=DIS,DIS64 %s + +## PLT should be present in this case. +# DIS: Disassembly of section .plt: +# DIS: <.plt>: +# DIS: 234020: pcaddu12i $t3, 510 +# DIS32-NEXT: 234024: ld.w $t3, $t3, 84 +# DIS64-NEXT: 234024: ld.d $t3, $t3, 184 +# DIS-NEXT: 234028: jirl $t1, $t3, 0 +# DIS-NEXT: 23402c: nop + +# DIS: Disassembly of section .text: +# DIS: : +# DIS-NEXT: nop +# DIS-NEXT: nop +# DIS-NEXT: nop +# DIS-NEXT: pcalau12i $t0, -510 +# DIS-NEXT: jirl $zero, $t0, 32 + + +#--- a.t +SECTIONS { + .plt 0x234000: { *(.plt) } + .text 0x432000: { *(.text) } +} + +#--- a.s +.p2align 12 +.global foo +foo: +## The nops are for pushing the relocs off page boundary, to better see the +## page-aligned semantics in action. + nop + nop + nop + ## The offsets should be -510 (0x234 - 0x432) and 32 (PLT header size + 0) + ## respectively. + pcalau12i $t0, %pc_hi20(bar) + jirl $zero, $t0, %pc_lo12(bar) diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl.s b/lld/test/ELF/loongarch-pcala-lo12-jirl.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-pcala-lo12-jirl.s @@ -0,0 +1,42 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o -o %t.la32 +# RUN: ld.lld %t.la64.o -o %t.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck %s +# CHECK: pcalau12i $t0, -1 +# CHECK-NEXT: jirl $ra, $t0, 564 +# CHECK-NEXT: pcalau12i $t0, 0 +# CHECK-NEXT: jirl $zero, $t0, -1348 + +## PLT shouldn't get generated in this case. +# CHECK-NOT: Disassembly of section .plt: + +.p2align 12 +.org 0x234 +.global foo +foo: + li.w $a0, 42 + ret + +.org 0xabc +.global bar +bar: + li.w $a7, 94 + syscall 0 + +.org 0x1000 +.global _start +_start: +## The nops are for pushing the relocs off page boundary, to better see the +## page-aligned semantics in action. + nop + nop + nop + pcalau12i $t0, %pc_hi20(foo) + jirl $ra, $t0, %pc_lo12(foo) + pcalau12i $t0, %pc_hi20(bar) + jirl $zero, $t0, %pc_lo12(bar) diff --git a/lld/test/ELF/loongarch-plt.s b/lld/test/ELF/loongarch-plt.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/loongarch-plt.s @@ -0,0 +1,108 @@ +# REQUIRES: loongarch +# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t1.s -o %t1.32.o +# RUN: ld.lld -shared %t1.32.o -soname=t1.32.so -o %t1.32.so +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o +# RUN: ld.lld %t.32.o %t1.32.so -z separate-code -o %t.32 +# RUN: llvm-readelf -S -s %t.32 | FileCheck --check-prefixes=SEC,NM %s +# RUN: llvm-readobj -r %t.32 | FileCheck --check-prefix=RELOC32 %s +# RUN: llvm-readelf -x .got.plt %t.32 | FileCheck --check-prefix=GOTPLT32 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=DIS,DIS32 %s + +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t1.s -o %t1.64.o +# RUN: ld.lld -shared %t1.64.o -soname=t1.64.so -o %t1.64.so +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o +# RUN: ld.lld %t.64.o %t1.64.so -z separate-code -o %t.64 +# RUN: llvm-readelf -S -s %t.64 | FileCheck --check-prefixes=SEC,NM %s +# RUN: llvm-readobj -r %t.64 | FileCheck --check-prefix=RELOC64 %s +# RUN: llvm-readelf -x .got.plt %t.64 | FileCheck --check-prefix=GOTPLT64 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=DIS,DIS64 %s + +# SEC: .plt PROGBITS {{0*}}00020020 + +## A canonical PLT has a non-zero st_value. bar and weak are called but their +## addresses are not taken, so a canonical PLT is not necessary. +# NM: {{0*}}00000000 0 FUNC GLOBAL DEFAULT UND bar +# NM: {{0*}}00000000 0 FUNC WEAK DEFAULT UND weak + +## The .got.plt slots relocated by .rela.plt point to .plt +## This is required by glibc. +# RELOC32: .rela.plt { +# RELOC32-NEXT: 0x40070 R_LARCH_JUMP_SLOT bar 0x0 +# RELOC32-NEXT: 0x40074 R_LARCH_JUMP_SLOT weak 0x0 +# RELOC32-NEXT: } +# GOTPLT32: section '.got.plt' +# GOTPLT32-NEXT: 0x00040068 00000000 00000000 20000200 20000200 + +# RELOC64: .rela.plt { +# RELOC64-NEXT: 0x400E0 R_LARCH_JUMP_SLOT bar 0x0 +# RELOC64-NEXT: 0x400E8 R_LARCH_JUMP_SLOT weak 0x0 +# RELOC64-NEXT: } +# GOTPLT64: section '.got.plt' +# GOTPLT64-NEXT: 0x000400d0 00000000 00000000 00000000 00000000 +# GOTPLT64-NEXT: 0x000400e0 20000200 00000000 20000200 00000000 + +# DIS: <_start>: +## Direct call +## foo - . = 0x20010-0x20000 = 16 +# DIS-NEXT: 20000: bl 16 +## bar@plt - . = 0x20040-0x20004 = 60 +# DIS-NEXT: 20004: bl 60 +## bar@plt - . = 0x20040-0x20008 = 56 +# DIS-NEXT: 20008: bl 56 +## weak@plt - . = 0x20050-0x2000c = 68 +# DIS-NEXT: 2000c: bl 68 +# DIS: : +# DIS-NEXT: 20010: + +# DIS: Disassembly of section .plt: +# DIS: <.plt>: +## 32-bit: .got.plt - .plt = 0x40068 - 0x20020 = 4096*32+72 +# DIS32-NEXT: pcaddu12i $t2, 32 +# DIS32-NEXT: sub.w $t1, $t1, $t3 +# DIS32-NEXT: ld.w $t3, $t2, 72 +# DIS32-NEXT: addi.w $t1, $t1, -44 +# DIS32-NEXT: addi.w $t0, $t2, 72 +# DIS32-NEXT: srli.w $t1, $t1, 2 +# DIS32-NEXT: ld.w $t0, $t0, 4 +# DIS32-NEXT: jr $t3 + +## 64-bit: .got.plt - .plt = 0x400d0 - 0x20020 = 4096*32+176 +# DIS64-NEXT: pcaddu12i $t2, 32 +# DIS64-NEXT: sub.d $t1, $t1, $t3 +# DIS64-NEXT: ld.d $t3, $t2, 176 +# DIS64-NEXT: addi.d $t1, $t1, -44 +# DIS64-NEXT: addi.d $t0, $t2, 176 +# DIS64-NEXT: srli.d $t1, $t1, 1 +# DIS64-NEXT: ld.d $t0, $t0, 8 +# DIS64-NEXT: jr $t3 + +## 32-bit: &.got.plt[bar]-. = 0x40070-0x20040 = 4096*32+48 +## 64-bit: &.got.plt[bar]-. = 0x400e0-0x20040 = 4096*32+160 +# DIS: 20040: pcaddu12i $t3, 32 +# DIS32-NEXT: ld.w $t3, $t3, 48 +# DIS64-NEXT: ld.d $t3, $t3, 160 +# DIS-NEXT: jirl $t1, $t3, 0 +# DIS-NEXT: nop + +## 32-bit: &.got.plt[weak]-. = 0x40074-0x20050 = 4096*32+36 +## 64-bit: &.got.plt[weak]-. = 0x400e8-0x20050 = 4096*32+152 +# DIS: 20050: pcaddu12i $t3, 32 +# DIS32-NEXT: ld.w $t3, $t3, 36 +# DIS64-NEXT: ld.d $t3, $t3, 152 +# DIS-NEXT: jirl $t1, $t3, 0 +# DIS-NEXT: nop + +.global _start, foo, bar +.weak weak + +_start: + bl foo + bl bar + bl %plt(bar) + bl weak + +## foo is local and non-preemptible, no PLT is generated. +foo: + ret diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -68,6 +68,7 @@ 'ARM': 'arm', 'AVR': 'avr', 'Hexagon': 'hexagon', + 'LoongArch': 'loongarch', 'Mips': 'mips', 'MSP430': 'msp430', 'PowerPC': 'ppc',