diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -269,9 +269,9 @@ case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TPREL; - case R_RISCV_RELAX: case R_RISCV_TPREL_ADD: return R_NONE; + case R_RISCV_RELAX: case R_RISCV_ALIGN: return R_RELAX_HINT; default: @@ -466,7 +466,7 @@ break; case R_RISCV_RELAX: - return; // Ignored (for now) + return; // Processed in a later pass default: llvm_unreachable("unknown relocation"); @@ -476,6 +476,76 @@ using AdjustRange = InputSectionBase::AdjustRange; using AdjustRanges = SmallVector; +// Relax R_RISCV_CALL to jal, c.j, or c.jal +static uint64_t relaxCall(InputSection *isec, Relocation &r, int64_t &delta, + AdjustRanges &ranges) { + Defined *d = dyn_cast_or_null(r.sym); + if (!d || !d->section) + return 0; + + bool rvc = config->eflags & EF_RISCV_RVC; + uint64_t insnPair = read64le(isec->data().data() + r.offset); + unsigned rd = extractBits(insnPair, 32 + 11, 32 + 7); + uint64_t pc = isec->getVA(r.offset) + delta; + uint64_t target = d->getVA(r.addend); + int64_t displace = target - pc; + + // Convert to c.j if displace fits in 12 bits. + if (rvc && isInt<12>(displace) && rd == 0) { + write16le(isec->mutableData().data() + r.offset, 0xa001); + r.type = R_RISCV_RVC_JUMP; + ranges.push_back({r.offset + 2, -6}); + delta -= 6; + return insnPair; + } + + // Convert to c.jal (RV32 only) if displace fits in 12 bits. + if (rvc && isInt<12>(displace) && rd == 1 && !config->is64) { + write16le(isec->mutableData().data() + r.offset, 0x2001); + r.type = R_RISCV_RVC_JUMP; + ranges.push_back({r.offset + 2, -6}); + delta -= 6; + return insnPair; + } + + // Convert to jal if displace fits in 21 bits. + if (isInt<21>(displace)) { + write32le(isec->mutableData().data() + r.offset, 0x0000006f | rd << 7); + r.type = R_RISCV_JAL; + ranges.push_back({r.offset + 4, -4}); + delta -= 4; + return insnPair; + } + return 0; +} + +// Verify that the displacement still fits within the smaller immediate +// field of the relaxed instruction. If not, we will need to undo it. +static bool stretchCall(InputSection *isec, Relocation &r, int64_t &delta, + AdjustRanges &ranges) { + auto *d = dyn_cast_or_null(r.sym); + assert(d && "r.sym not defined in stretchCall"); + uint64_t pc = isec->getVA(r.offset) + delta; + uint64_t target = d->getVA(r.addend); + int64_t displace = target - pc; + + if (r.type == R_RISCV_RVC_JUMP && !isInt<12>(displace)) { + r.type = R_RISCV_CALL; + ranges.push_back({r.offset, 6}); + delta += 6; + // warn(isec->getObjMsg(r.offset) + ": undo c.jal relaxation"); + return true; + } + if (r.type == R_RISCV_JAL && !isInt<21>(displace)) { + r.type = R_RISCV_CALL; + ranges.push_back({r.offset, 4}); + delta += 4; + // warn(isec->getObjMsg(r.offset) + ": undo jal relaxation"); + return true; + } + return false; +} + // As input, the addend of R_RISCV_ALIGN holds the number of NOP bytes emitted // by the compiler. We derive the desired alignment boundary by rounding this up // to the nearest power of two. The multi-pass relaxation algorithm needs two @@ -490,10 +560,6 @@ uint32_t boundary; }; -// NOTE: The code structure is more complex than necessary -// for handling R_RISCV_ALIGN alone. It is designed to accommodate -// call/jump/load/store/addr-arithmetic relocs in later diffs. - // Derive & store alignment boundaries for all R_RISCV_ALIGN relocs static void setAlignBoundaries() { for (OutputSection *osec : outputSections) @@ -539,15 +605,23 @@ // multiple of 2, and we can mix 4-byte NOPs with 2-byte C.NOPs. In order to // reduce instruction count, we emit as many 4-byte NOPs as possible, then // if necessary, use a single 2-byte C.NOP to finish. +// +// Fill the gaps created by adding bytes (when delta > 0) to the section. +// (1) After a relaxation was undone, restore the original instruction. +// (2) After contractions and/or expansions of the address range that rounds-up +// to the alignment boundary, the sequence of NOPs emitted by the compiler +// could be corrupted. Repair by rewriting an optimal sequence of NOPs. void fillAdjustGaps() { for (OutputSection *osec : outputSections) for (InputSection *isec : getInputSections(*osec)) { if (!(isec->flags & SHF_EXECINSTR)) continue; - for (Relocation &r : isec->relocations) + MutableArrayRef rels = isec->relocations; + for (auto it = rels.begin(); it != rels.end(); ++it) { + Relocation &r = *it; + uint8_t *buf = isec->mutableData().data() + r.offset; if (r.type == R_RISCV_ALIGN) { - uint8_t *buf = isec->mutableData().data() + r.offset; AlignAddend *aa = reinterpret_cast(&r.addend); int keepNopBytes = aa->bytes; while (keepNopBytes > 0) { @@ -564,7 +638,17 @@ } } assert(keepNopBytes == 0); + continue; + } + if (!config->relax || it + 1 == rels.end() || it[1].addend == 0 || + it[1].type != R_RISCV_NONE || r.offset != it[1].offset) + continue; + switch (r.type) { + case R_RISCV_CALL: + write64le(buf, it[1].addend); + break; } + } } } @@ -578,11 +662,36 @@ for (InputSection *isec : getInputSections(*osec)) { if (!(isec->flags & SHF_EXECINSTR)) continue; + int64_t delta = 0; - for (Relocation &r : isec->relocations) { - if (r.type == R_RISCV_ALIGN) + MutableArrayRef rels = isec->relocations; + for (auto it = rels.begin(); it != rels.end(); ++it) { + Relocation &r = *it; + if (r.type == R_RISCV_ALIGN) { relaxAlign(isec, r, delta, ranges); - // TODO(gkm): handle call/jump/load/store/addr-arithmetic relaxation + continue; + } + if (!config->relax || it + 1 == rels.end() || + it[1].type != R_RISCV_RELAX || r.offset != it[1].offset) + continue; + switch (r.type) { + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + // If we determine that a relaxation is vulnerable to going out of + // range, we stash the original instruction pair in R_RISCV_RELAX's + // addend to support the undo operation. Otherwise, we freeze it. + if (uint64_t insnPair = relaxCall(isec, r, delta, ranges)) + it[1].addend = insnPair; // might need for undo + break; + case R_RISCV_RVC_JUMP: + case R_RISCV_JAL: + // Once a relaxation is undone, make it ineligible to redo, otherwise + // we risk oscillating undo/redo cycles that prevent the algorithm + // from terminating. + if (stretchCall(isec, r, delta, ranges)) + it[1].type = R_RISCV_NONE; // prevent redo + break; + } } if (!ranges.empty()) { isec->adjustRanges(ranges, sectionSymbolAddrs[isec]); @@ -603,8 +712,10 @@ setAlignBoundaries(); SectionSymbolAddrs sectionSymbolAddrs; fillSectionSymbolAddrs(sectionSymbolAddrs); + int passes = 0; while (relaxOnce(sectionSymbolAddrs)) - ; + passes++; + // warn("relaxed in " + Twine(passes) + " passes"); fillAdjustGaps(); } diff --git a/lld/test/ELF/riscv-relax-call-undo.s b/lld/test/ELF/riscv-relax-call-undo.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-call-undo.s @@ -0,0 +1,105 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf \ +# RUN: -mattr=+relax %t/a.s -o %t/a.rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf \ +# RUN: -mattr=+relax %t/z.s -o %t/z.rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf \ +# RUN: -mattr=+relax %t/a.s -o %t/a.rv64.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf \ +# RUN: -mattr=+relax %t/z.s -o %t/z.rv64.o + +# A later relaxation pass can cause an earlier relaxtion to go +# out of range, necessitating undo. Verify that relaxations witin +# _stop4 and _stop5 are undone. The progression of address assignments +# is detailed in the comments next to each instruction and alignment +# directive. "--" marks a relaxation followed by new address range. +# "++" prefixes an undone relaxation. +# +# RUN: echo 'SECTIONS { .text 0x100000 : { */a.*.o } \ +# RUN: .tex2 0x200000 : { */z.*.o } }' > %t/undo.lds +# RUN: ld.lld -T %t/undo.lds %t/[az].rv32.o -o %t/undo.rv32 +# RUN: ld.lld -T %t/undo.lds %t/[az].rv64.o -o %t/undo.rv64 +# RUN: llvm-objdump -d %t/undo.rv32 > %t/undo.rv32.dis +# RUN: llvm-objdump -d %t/undo.rv64 > %t/undo.rv64.dis +# RUN: FileCheck %s < %t/undo.rv32.dis +# RUN: FileCheck %s < %t/undo.rv64.dis + +CHECK: <_start>: +CHECK-DAG: auipc ra, 256 +CHECK-DAG: jalr ra +CHECK-DAG: <_start1>: +CHECK-DAG: auipc ra, 256 +CHECK-DAG: jalr ra +CHECK-DAG: <_start2>: +CHECK-DAG: auipc ra, 256 +CHECK-DAG: jalr ra +CHECK-DAG: <_start3>: +CHECK-DAG: jal 0x200014 <_stop3> +CHECK-DAG: <_start4>: +CHECK-DAG: auipc ra, 256 +CHECK-DAG: jalr 4(ra) +CHECK-DAG: <_start5>: +CHECK-DAG: auipc ra, 256 +CHECK-DAG: jalr 4(ra) +CHECK-DAG: <_stop>: +CHECK-DAG: jal 0x100000 <_start> +CHECK-DAG: nop +CHECK-DAG: <_stop1>: +CHECK-DAG: jal 0x100008 <_start1> +CHECK-DAG: nop +CHECK-DAG: <_stop2>: +CHECK-DAG: jal 0x100010 <_start2> +CHECK-DAG: <_stop3>: +CHECK-DAG: jal 0x100018 <_start3> +CHECK-DAG: nop +CHECK-DAG: nop +CHECK-DAG: <_stop4>: +CHECK-DAG: auipc ra, 1048320 +CHECK-DAG: jalr -4(ra) +CHECK-DAG: <_stop5>: +CHECK-DAG: auipc ra, 1048320 +CHECK-DAG: jalr -4(ra) + + +##################### original pass 1 pass 2 + +#--- a.s +.text +.global _start, _start1, _start2, _start3, _start4, _start5 +_start: + call _stop # [0x00, 0x08) +_start1: + call _stop1 # [0x08, 0x10) +_start2: + call _stop2 # [0x10, 0x18) +_start3: + call _stop3 # [0x18, 0x20) -- [0x18, 0x1c) +_start4: + call _stop4 # [0x20, 0x28) [0x1c, 0x24) +_start5: + call _stop5 # [0x28, 0x30) [0x24, 0x2c) + +##################### original pass 1 pass 2 + +#--- z.s +.text +.global _stop, _stop1, _stop2, _stop3, _stop4, _stop5 +_stop: + call _start # [0x00, 0x08) -- [0x00, 0x04) + nop # [0x08, 0x0c) [0x04, 0x08) +_stop1: + call _start1 # [0x0c, 0x14) -- [0x08, 0x0c) +.balign 8 # [0x14, 0x18) [0x0c, 0x10) +_stop2: + call _start2 # [0x18, 0x20) -- [0x10, 0x14) +_stop3: + call _start3 # [0x20, 0x28) -- [0x14, 0x18) +.balign 16 # [0x28, 0x34) -- [0x18, 0x20) +_stop4: + call _start4 # [0x34, 0x38) -- [0x20, 0x24) ++ [0x20, 0x28) +_stop5: + call _start5 # [0x38, 0x44) -- [0x24, 0x28) ++ [0x28, 0x30) + +##################### original pass 1 pass 2 diff --git a/lld/test/ELF/riscv-relax-call.s b/lld/test/ELF/riscv-relax-call.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-call.s @@ -0,0 +1,99 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir -p %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax %s -o %t/rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax %s -o %t/rv64.o +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+c,+relax %s -o %t/rv32c.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+c,+relax %s -o %t/rv64c.o + +# jal relaxation +# +# RUN: ld.lld %t/rv32.o --defsym foo=_start+0x14 -o %t/jal.rv32 +# RUN: ld.lld %t/rv64.o --defsym foo=_start+0x14 -o %t/jal.rv64 +# RUN: llvm-objdump -d -M no-aliases %t/jal.rv32 > %t/jal.rv32.dis +# RUN: llvm-objdump -d -M no-aliases %t/jal.rv64 > %t/jal.rv64.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/jal.rv32.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/jal.rv64.dis + +# Don't relax to c.j/c.jal if out of range +# +# RUN: ld.lld %t/rv32c.o --defsym foo=_start+0x1004 -o %t/nocj.rv32c +# RUN: ld.lld %t/rv64c.o --defsym foo=_start+0x1004 -o %t/nocj.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/nocj.rv32c > %t/nocj.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/nocj.rv64c > %t/nocj.rv64c.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/nocj.rv32c.dis +# RUN: FileCheck --check-prefix=JAL %s < %t/nocj.rv64c.dis + +# JAL: jal ra, {{.*}} +# JAL-NEXT: jal zero, {{.*}} + +# c.j and c.jal (RV32C-only) relaxation +# +# RUN: ld.lld %t/rv32c.o --defsym foo=_start+0x14 -o %t/cj.rv32c +# RUN: ld.lld %t/rv64c.o --defsym foo=_start+0x14 -o %t/cj.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/cj.rv32c > %t/cj.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/cj.rv64c > %t/cj.rv64c.dis +# RUN: FileCheck --check-prefix=CJ32 %s < %t/cj.rv32c.dis +# RUN: FileCheck --check-prefix=CJ64 %s < %t/cj.rv64c.dis + +# Check relaxation works across output sections +# +# RUN: echo 'SECTIONS { .text 0x100000 : { *(.text) } \ +# RUN: .foo : ALIGN(8) { foo = .; } }' > %t/xs.lds +# RUN: ld.lld -T %t/xs.lds %t/rv32c.o -o %t/xs.rv32c +# RUN: ld.lld -T %t/xs.lds %t/rv64c.o -o %t/xs.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/xs.rv32c > %t/xs.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/xs.rv64c > %t/xs.rv64c.dis +# RUN: FileCheck --check-prefix=CJ32 %s < %t/xs.rv32c.dis +# RUN: FileCheck --check-prefix=CJ64 %s < %t/xs.rv64c.dis + +# CJ32: c.jal {{.*}} +# CJ32-NEXT: c.j {{.*}} +# CJ64: jal ra, {{.*}} +# CJ64-NEXT: c.j {{.*}} + +# Don't relax if out of range. +# (call is out of range, tail is within range) +# +# RUN: ld.lld %t/rv32c.o --defsym foo=_start+0x100000 -o %t/boundary.rv32c +# RUN: ld.lld %t/rv64c.o --defsym foo=_start+0x100000 -o %t/boundary.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/boundary.rv32c > %t/boundary.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/boundary.rv64c > %t/boundary.rv64c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/boundary.rv32c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/boundary.rv64c.dis + +# Check that section alignment is factored-into the call displacement. +# (call is out of range, tail is within range) +# +# RUN: echo 'SECTIONS { .text 0x100000 : { *(.text); } \ +# RUN: .foo : ALIGN(0x100000) { foo = .; } }' > %t/xalign.lds +# RUN: ld.lld -T %t/xalign.lds %t/rv32c.o -o %t/xalign.rv32c +# RUN: ld.lld -T %t/xalign.lds %t/rv64c.o -o %t/xalign.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/xalign.rv32c > %t/xalign.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/xalign.rv64c > %t/xalign.rv64c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/xalign.rv32c.dis +# RUN: FileCheck --check-prefix=BOUNDARY %s < %t/xalign.rv64c.dis + +# BOUNDARY: auipc ra, 256 +# BOUNDARY-NEXT: jalr ra, 0(ra) +# BOUNDARY-NEXT: jal zero, {{.*}} + +# Don't relax to absolute symbols +# +# RUN: ld.lld %t/rv32c.o -Ttext=0x100000 --defsym foo=0x100000 -o %t/abs.rv32c +# RUN: ld.lld %t/rv64c.o -Ttext=0x100000 --defsym foo=0x100000 -o %t/abs.rv64c +# RUN: llvm-objdump -d -M no-aliases %t/abs.rv32c > %t/abs.rv32c.dis +# RUN: llvm-objdump -d -M no-aliases %t/abs.rv64c > %t/abs.rv64c.dis +# RUN: FileCheck --check-prefix=NORELAX %s < %t/abs.rv32c.dis +# RUN: FileCheck --check-prefix=NORELAX %s < %t/abs.rv64c.dis + +# NORELAX: auipc ra, {{.*}} +# NORELAX-NEXT: jalr ra, {{.*}}(ra) +# NORELAX: auipc t1, {{.*}} +# NORELAX-NEXT: jalr zero, {{.*}}(t1) + +.global _start +.p2align 3 +_start: + call foo + tail foo