diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -269,9 +269,9 @@ case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TPREL; - case R_RISCV_RELAX: case R_RISCV_TPREL_ADD: return R_NONE; + case R_RISCV_RELAX: case R_RISCV_ALIGN: return R_RELAX_HINT; default: @@ -466,7 +466,7 @@ break; case R_RISCV_RELAX: - return; // Ignored (for now) + return; // Processed in a later pass default: llvm_unreachable("unknown relocation"); @@ -475,6 +475,48 @@ using AdjustRanges = std::vector; +// Relax R_RISCV_CALL to jal, c.j, or c.jal +static void relaxCall(InputSection *is, Relocation &r, int64_t &delta, + AdjustRanges &adjustRanges) { + auto *sym = dyn_cast_or_null(r.sym); + if (!sym || !sym->section) + return; + + bool rvc = config->eflags & EF_RISCV_RVC; + uint32_t insn = read32le(is->data().data() + r.offset + 4); + unsigned rd = extractBits(insn, 11, 7); + uint64_t pc = is->getVA(r.offset) + delta; + uint64_t target = sym->getVA(r.addend); + int64_t offset = target - pc; + + // Convert to c.j if offset fits in 12 bits. + if (rvc && isInt<12>(offset) && rd == 0) { + write16le(is->mutableData().data() + r.offset, 0xa001); + r.type = R_RISCV_RVC_JUMP; + adjustRanges.push_back({r.offset + 2, -6}); + delta -= 6; + return; + } + + // Convert to c.jal (RV32 only) if offset fits in 12 bits. + if (rvc && isInt<12>(offset) && rd == 1 && !config->is64) { + write16le(is->mutableData().data() + r.offset, 0x2001); + r.type = R_RISCV_RVC_JUMP; + adjustRanges.push_back({r.offset + 2, -6}); + delta -= 6; + return; + } + + // Convert to jal if offset fits in 21 bits. + if (isInt<21>(offset)) { + write32le(is->mutableData().data() + r.offset, 0x0000006f | rd << 7); + r.type = R_RISCV_JAL; + adjustRanges.push_back({r.offset + 4, -4}); + delta -= 4; + return; + } +} + // As input, the addend of R_RISCV_ALIGN holds the number of NOP bytes emitted // by the compiler. We derive the desired alignment boundary by rounding this up // to the nearest power of two. The multi-pass relaxation algorithm needs two @@ -505,10 +547,6 @@ return r.addend & alignNopBytesMask; } -// NOTE(gkm): The code structure is more complex than necessary -// for handling R_RISCV_ALIGN alone. It is designed to accommodate -// call/jump/load/store/addr-arithmetic relocs in later diffs. - // Derive & store alignment boundaries for all R_RISCV_ALIGN relocs static void setAlignBoundaries() { for (OutputSection *os : outputSections) @@ -581,11 +619,29 @@ AdjustRanges adjustRanges; int64_t delta = 0; - for (Relocation &r : is->relocations) - if (r.type == R_RISCV_ALIGN && r.addend) + MutableArrayRef rels = is->relocations; + for (auto it = rels.begin(); it != rels.end(); ++it) { + Relocation &r = *it; + if (r.type == R_RISCV_ALIGN) { + if (r.addend) relaxAlign(is, r, delta, adjustRanges); - - // TODO(gkm): handle call/jump/load/store/addr-arithmetic relocs here + continue; + } + if (!config->relax || it + 1 == rels.end() || + it[1].type != R_RISCV_RELAX || r.offset != it[1].offset) + continue; + switch (r.type) { + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + // FIXME(gkm): nullify it[1].type for relaxations that will + // never go out of range and don't need to be reassessed. + relaxCall(is, r, delta, adjustRanges); + break; + // FIXME(gkm): undo relaxations that go out of range + // case R_RISCV_RVC_JUMP: + // case R_RISCV_JAL: + } + } if (is->adjustRanges(adjustRanges)) { script->assignAddresses(); diff --git a/lld/test/ELF/riscv-relax-call.s b/lld/test/ELF/riscv-relax-call.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-call.s @@ -0,0 +1,99 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir -p %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax %s -o %t/rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax %s -o %t/rv64.o +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+c,+relax %s -o %t/rv32c.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+c,+relax %s -o %t/rv64c.o + +# jal relaxation +# +# RUN: ld.lld %t/rv32.o --defsym foo=_start+0x14 -o %t/jal.rv32 +# RUN: ld.lld %t/rv64.o --defsym foo=_start+0x14 -o %t/jal.rv64 +# RUN: llvm-objdump -d -M no-aliases %t/jal.rv32 > %t/jal.rv32.dis +# RUN: llvm-objdump -d -M no-aliases %t/jal.rv64 > %t/jal.rv64.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/jal.rv32.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/jal.rv64.dis + +# Don't relax to c.j/c.jal if out of range +# +# RUN: ld.lld %t/rv32c.o --defsym foo=_start+0x1004 -o %t/nocj.rv32c +# RUN: ld.lld %t/rv64c.o --defsym foo=_start+0x1004 -o %t/nocj.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/nocj.rv32c > %t/nocj.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/nocj.rv64c > %t/nocj.rv64c.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/nocj.rv32c.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/nocj.rv64c.dis + +# JAL: jal ra, {{.*}} +# JAL-NEXT: jal zero, {{.*}} + +# c.j and c.jal (RV32C-only) relaxation +# +# RUN: ld.lld %t/rv32c.o --defsym foo=_start+0x14 -o %t/cj.rv32c +# RUN: ld.lld %t/rv64c.o --defsym foo=_start+0x14 -o %t/cj.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/cj.rv32c > %t/cj.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/cj.rv64c > %t/cj.rv64c.dis +# RUN: FileCheck --check-prefix=CJ32 %s < %t/cj.rv32c.dis +# RUN: FileCheck --check-prefix=CJ64 %s < %t/cj.rv64c.dis + +# Check relaxation works across output sections +# +# RUN: echo 'SECTIONS { .text 0x100000 : { *(.text) } \ +# RUN: .foo : ALIGN(8) { foo = .; } }' > %t/xs.lds +# RUN: ld.lld -T %t/xs.lds %t/rv32c.o -o %t/xs.rv32c +# RUN: ld.lld -T %t/xs.lds %t/rv64c.o -o %t/xs.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/xs.rv32c > %t/xs.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/xs.rv64c > %t/xs.rv64c.dis +# RUN: FileCheck --check-prefix=CJ32 %s < %t/xs.rv32c.dis +# RUN: FileCheck --check-prefix=CJ64 %s < %t/xs.rv64c.dis + +# CJ32: c.jal {{.*}} +# CJ32-NEXT: c.j {{.*}} +# CJ64: jal ra, {{.*}} +# CJ64-NEXT: c.j {{.*}} + +# Don't relax if out of range. +# (call is out of range, tail is within range) +# +# RUN: ld.lld %t/rv32c.o --defsym foo=_start+0x100000 -o %t/boundary.rv32c +# RUN: ld.lld %t/rv64c.o --defsym foo=_start+0x100000 -o %t/boundary.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/boundary.rv32c > %t/boundary.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/boundary.rv64c > %t/boundary.rv64c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/boundary.rv32c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/boundary.rv64c.dis + +# Check that section alignment is factored-into the call displacement. +# (call is out of range, tail is within range) +# +# RUN: echo 'SECTIONS { .text 0x100000 : { *(.text); } \ +# RUN: .foo : ALIGN(0x100000) { foo = .; } }' > %t/xalign.lds +# RUN: ld.lld -T %t/xalign.lds %t/rv32c.o -o %t/xalign.rv32c +# RUN: ld.lld -T %t/xalign.lds %t/rv64c.o -o %t/xalign.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/xalign.rv32c > %t/xalign.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/xalign.rv64c > %t/xalign.rv64c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/xalign.rv32c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/xalign.rv64c.dis + +# BOUNDARY: auipc ra, 256 +# BOUNDARY-NEXT: jalr ra, 0(ra) +# BOUNDARY-NEXT: jal zero, {{.*}} + +# Don't relax to absolute symbols +# +# RUN: ld.lld %t/rv32c.o -Ttext=0x100000 --defsym foo=0x100000 -o %t/abs.rv32c +# RUN: ld.lld %t/rv64c.o -Ttext=0x100000 --defsym foo=0x100000 -o %t/abs.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/abs.rv32c > %t/abs.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/abs.rv64c > %t/abs.rv64c.dis +# RUN: FileCheck --check-prefix=NORELAX %s < %t/abs.rv32c.dis +# RUN: FileCheck --check-prefix=NORELAX %s < %t/abs.rv64c.dis + +# NORELAX: auipc ra, {{.*}} +# NORELAX-NEXT: jalr ra, {{.*}}(ra) +# NORELAX: auipc t1, {{.*}} +# NORELAX-NEXT: jalr zero, {{.*}}(t1) + +.global _start +.p2align 3 +_start: + call foo + tail foo