diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -33,6 +33,8 @@ void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; + void writeTableJumpHeader(uint8_t *buf) const override; + void writeTableJump(uint8_t *buf, const uint64_t symbol) const override; RelType getDynRel(RelType type) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; @@ -233,6 +235,20 @@ write32le(buf + 12, itype(ADDI, 0, 0, 0)); } +void RISCV::writeTableJumpHeader(uint8_t *buf) const { + if (config->is64) + write64le(buf, mainPart->dynamic->getVA()); + else + write32le(buf, mainPart->dynamic->getVA()); +} + +void RISCV::writeTableJump(uint8_t *buf, const uint64_t address) const { + if (config->is64) + write64le(buf, address); + else + write32le(buf, address); +} + RelType RISCV::getDynRel(RelType type) const { return type == target->symbolicRel ? type : static_cast(R_RISCV_NONE); @@ -358,6 +374,9 @@ } case R_RISCV_JAL: { + if (config->riscvTbljal && (read16le(loc) & 0xfc03) == 0xa002) + return; + checkInt(loc, val, 21, rel); checkAlignment(loc, val, 2, rel); @@ -570,10 +589,23 @@ sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP; sec.relaxAux->writes.push_back(0x2001); // c.jal remove = 6; - } else if (isInt<21>(displace)) { - sec.relaxAux->relocTypes[i] = R_RISCV_JAL; - sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal - remove = 4; + } else { + int tblEntryIndex = -1; + if (config->riscvTbljal && rd == 0) + tblEntryIndex = in.riscvTableJumpSection->getCMJTEntryIndex(*r.sym); + else if (config->riscvTbljal && rd == X_RA) + tblEntryIndex = in.riscvTableJumpSection->getCMJALTEntryIndex(*r.sym); + + if (tblEntryIndex >= 0) { + sec.relaxAux->relocTypes[i] = R_RISCV_JAL; + sec.relaxAux->writes.push_back( + 0xa002 | (tblEntryIndex << 2)); // cm.jt or cm.jalt + remove = 6; + } else if (isInt<21>(displace)) { + sec.relaxAux->relocTypes[i] = R_RISCV_JAL; + sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal + remove = 4; + } } } @@ -777,8 +809,14 @@ write16le(p, aux.writes[writesIdx++]); break; case R_RISCV_JAL: - skip = 4; - write32le(p, aux.writes[writesIdx++]); + if (config->riscvTbljal && + (aux.writes[writesIdx] & 0xfc03) == 0xa002) { + skip = 2; + write16le(p, aux.writes[writesIdx++]); + } else { + skip = 4; + write32le(p, aux.writes[writesIdx++]); + } break; case R_RISCV_32: // Used by relaxTlsLe to write a uint32_t then suppress the handling diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -218,6 +218,7 @@ bool relocatable; bool relrGlibc = false; bool relrPackDynRelocs = false; + bool riscvTbljal; llvm::DenseSet saveTempsArgs; llvm::SmallVector, 0> shuffleSections; bool singleRoRx; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1233,6 +1233,7 @@ config->whyExtract = args.getLastArgValue(OPT_why_extract); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); + config->riscvTbljal = args.hasArg(OPT_riscv_tbljal); config->zForceBti = hasZOption(args, "force-bti"); config->zForceIbt = hasZOption(args, "force-ibt"); config->zGlobal = hasZOption(args, "global"); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -394,7 +394,7 @@ template void copyShtGroup(uint8_t *buf); }; -static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); +static_assert(sizeof(InputSection) <= 168, "InputSection is too big"); class SyntheticSection : public InputSection { public: diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -317,6 +317,9 @@ "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; +def riscv_tbljal: F<"riscv-tbljal">, + HelpText<"(RISCV only) Enable table jump instructions from the Zce extension">; + def pic_veneer: F<"pic-veneer">, HelpText<"Always generate position independent thunks (veneers)">; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -28,6 +28,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Threading.h" +#include namespace lld::elf { class Defined; @@ -364,6 +365,50 @@ SmallVector entries; }; +class TableJumpSection final : public SyntheticSection { +public: + TableJumpSection(); + size_t getSize() const override; + void writeTo(uint8_t *buf) override; + bool isNeeded() const override; + void finalizeContents() override; + + void addCMJTEntryCandidate(const Symbol &symbol); + int getCMJTEntryIndex(const Symbol &symbol); + void addCMJALTEntryCandidate(const Symbol &symbol); + int getCMJALTEntryIndex(const Symbol &symbol); + void scanTableJumpEntrys(const InputSection &sec) const; + + // Flag to force TableJump to be in output if we have relocations + // that relies on its address. + bool hasTableJumpOffRel = false; + +protected: + uint64_t size = 0; + +private: + void addEntry(const Symbol &symbol, std::map &entriesList); + uint32_t getEntry(const Symbol &symbol, + std::vector> &entriesList); + void writeEntries(uint8_t *buf, + std::vector> &entriesList); + void padUntil(uint8_t *buf, const uint8_t index); + + const size_t xlen = config->is64 ? 64 : 32; + + std::map CMJTEntryCandidates; + std::vector> finalizedCMJTEntries; + std::map CMJALTEntryCandidates; + std::vector> finalizedCMJALTEntries; + + // used in finalizeContents function. + const size_t maxCMJTEntrySize = 64; + const size_t maxCMJALTEntrySize = 192; + + const size_t startCMJTEntryIdx = 0; + const size_t startCMJALTEntryIdx = 64; +}; + // The IgotPltSection is a Got associated with the PltSection for GNU Ifunc // Symbols that will be relocated by Target->IRelativeRel. // On most Targets the IgotPltSection will immediately follow the GotPltSection @@ -1279,6 +1324,7 @@ std::unique_ptr gotPlt; std::unique_ptr igotPlt; std::unique_ptr ppc64LongBranchTarget; + std::unique_ptr riscvTableJumpSection; std::unique_ptr mipsAbiFlags; std::unique_ptr mipsGot; std::unique_ptr mipsOptions; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1161,6 +1161,130 @@ return !entries.empty() || hasGotPltOffRel; } +TableJumpSection::TableJumpSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + config->wordsize, "__tbljalvec_base$") {} + +void TableJumpSection::addCMJTEntryCandidate(const Symbol &symbol) { + addEntry(symbol, CMJTEntryCandidates); +} + +int TableJumpSection::getCMJTEntryIndex(const Symbol &symbol) { + uint32_t index = getEntry(symbol, finalizedCMJTEntries); + return index < maxCMJTEntrySize ? (int)(startCMJTEntryIdx + index) : -1; +} + +void TableJumpSection::addCMJALTEntryCandidate(const Symbol &symbol) { + addEntry(symbol, CMJALTEntryCandidates); +} + +int TableJumpSection::getCMJALTEntryIndex(const Symbol &symbol) { + uint32_t index = getEntry(symbol, finalizedCMJALTEntries); + return index < maxCMJALTEntrySize ? (int)(startCMJALTEntryIdx + index) : -1; +} + +void TableJumpSection::addEntry(const Symbol &symbol, + std::map &entriesList) { + ++entriesList[symbol.getName().str()]; +} + +uint32_t TableJumpSection::getEntry( + const Symbol &symbol, + std::vector> &entriesList) { + // Prevent adding duplicate entries + uint32_t i = 0; + for (; i < entriesList.size(); ++i) { + // If this is a duplicate addition, do not add it and return the address + // offset of the original entry. + if (symbol.getName().compare(entriesList[i].first) == 0) { + return i; + } + } + return i; +} + +void TableJumpSection::scanTableJumpEntrys(const InputSection &sec) const { + for (auto [i, r] : llvm::enumerate(sec.relocations)) { + switch (r.type) { + // auipc + jalr pair + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: { + const auto jalr = sec.data()[r.offset + 4]; + const uint8_t rd = (jalr & ((1ULL << (11 + 1)) - 1)) >> 7; + if (rd == 0) + in.riscvTableJumpSection->addCMJTEntryCandidate(*r.sym); + else if (rd == 1) + in.riscvTableJumpSection->addCMJALTEntryCandidate(*r.sym); + else + return; // Unknown link register, do not modify. + } + } + } +} + +void TableJumpSection::finalizeContents() { + auto cmp = [](const std::pair &p1, + const std::pair &p2) { + return p1.second > p2.second; + }; + + std::copy(CMJTEntryCandidates.begin(), CMJTEntryCandidates.end(), + std::back_inserter(finalizedCMJTEntries)); + std::sort(finalizedCMJTEntries.begin(), finalizedCMJTEntries.end(), cmp); + if (finalizedCMJTEntries.size() > maxCMJTEntrySize) + finalizedCMJTEntries.resize(maxCMJTEntrySize); + + std::copy(CMJALTEntryCandidates.begin(), CMJALTEntryCandidates.end(), + std::back_inserter(finalizedCMJALTEntries)); + std::sort(finalizedCMJALTEntries.begin(), finalizedCMJALTEntries.end(), cmp); + if (finalizedCMJALTEntries.size() > maxCMJALTEntrySize) + finalizedCMJALTEntries.resize(maxCMJALTEntrySize); +} + +size_t TableJumpSection::getSize() const { + if (!CMJALTEntryCandidates.empty()) { + return (startCMJALTEntryIdx + CMJALTEntryCandidates.size()) * xlen; + } + return (startCMJTEntryIdx + CMJTEntryCandidates.size()) * xlen; +} + +void TableJumpSection::writeTo(uint8_t *buf) { + target->writeTableJumpHeader(buf); + writeEntries(buf + startCMJTEntryIdx, finalizedCMJTEntries); + padUntil(buf + ((startCMJTEntryIdx + finalizedCMJTEntries.size()) * xlen), + startCMJALTEntryIdx * xlen); + writeEntries(buf + startCMJALTEntryIdx, finalizedCMJALTEntries); +} + +void TableJumpSection::padUntil(uint8_t *buf, const uint8_t address) { + for (size_t i = 0; i < address; ++i) { + if (config->is64) + write64le(buf, 0); + else + write32le(buf, 0); + } +} + +void TableJumpSection::writeEntries( + uint8_t *buf, std::vector> &entriesList) { + for (const auto &symbolName : entriesList) { + // Use the symbol from in.symTab to ensure we have the final adjusted + // symbol. + for (const auto &symbol : in.symTab->getSymbols()) { + if (symbol.sym->getName() != symbolName.first) + continue; + // Only process defined symbols. + auto *definedSymbol = dyn_cast(symbol.sym); + if (!definedSymbol) + continue; + target->writeTableJump(buf, definedSymbol->getVA()); + buf += config->wordsize; + } + } +} + +bool TableJumpSection::isNeeded() const { return config->riscvTbljal; } + static StringRef getIgotPltName() { // On ARM the IgotPltSection is part of the GotSection. if (config->emachine == EM_ARM) diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -33,6 +33,8 @@ virtual void writeGotPltHeader(uint8_t *buf) const {} virtual void writeGotHeader(uint8_t *buf) const {} virtual void writeGotPlt(uint8_t *buf, const Symbol &s) const {}; + virtual void writeTableJumpHeader(uint8_t *buf) const {}; + virtual void writeTableJump(uint8_t *buf, const uint64_t symbol) const {}; virtual void writeIgotPlt(uint8_t *buf, const Symbol &s) const {} virtual int64_t getImplicitAddend(const uint8_t *buf, RelType type) const; virtual int getTlsGdRelaxSkip(RelType type) const { return 1; } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -449,6 +449,15 @@ add(*in.ppc64LongBranchTarget); } + if (config->emachine == EM_RISCV && config->riscvTbljal) { + in.riscvTableJumpSection = std::make_unique(); + add(*in.riscvTableJumpSection); + + symtab->addSymbol(Defined{ + /*file=*/nullptr, "__tbljalvec_base$", STB_GLOBAL, STT_NOTYPE, STT_NOTYPE, + /*value=*/0, /*size=*/0, in.riscvTableJumpSection.get()}); + } + in.gotPlt = std::make_unique(); add(*in.gotPlt); in.igotPlt = std::make_unique(); @@ -1625,6 +1634,15 @@ if (config->emachine == EM_HEXAGON) hexagonTLSSymbolUpdate(outputSections); + // scan all R_RISCV_CALL/R_RISCV_CALL_PLT for RISCV Zcmt Jump table. + if (in.riscvTableJumpSection) { + for (InputSectionBase *inputSection : inputSections) { + in.riscvTableJumpSection->scanTableJumpEntrys( + cast(*inputSection)); + } + in.riscvTableJumpSection->finalizeContents(); + } + uint32_t pass = 0, assignPasses = 0; for (;;) { bool changed = target->needsThunks ? tc.createThunks(pass, outputSections) @@ -2066,6 +2084,7 @@ finalizeSynthetic(in.mipsGot.get()); finalizeSynthetic(in.igotPlt.get()); finalizeSynthetic(in.gotPlt.get()); + finalizeSynthetic(in.riscvTableJumpSection.get()); finalizeSynthetic(in.relaIplt.get()); finalizeSynthetic(in.relaPlt.get()); finalizeSynthetic(in.plt.get()); @@ -2834,8 +2853,11 @@ template void Writer::writeSectionsBinary() { parallel::TaskGroup tg; for (OutputSection *sec : outputSections) - if (sec->flags & SHF_ALLOC) + if (sec->flags & SHF_ALLOC) { sec->writeTo(Out::bufferStart + sec->offset, tg); + if (config->emachine == EM_RISCV && config->riscvTbljal) + in.riscvTableJumpSection->writeTo(Out::bufferStart + sec->offset); + } } static void fillTrap(uint8_t *i, uint8_t *end) { diff --git a/lld/test/ELF/riscv-tbljal-call.s b/lld/test/ELF/riscv-tbljal-call.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-tbljal-call.s @@ -0,0 +1,97 @@ +# REQUIRES: riscv + +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv64.o + +# tbljal conversion +# RUN: ld.lld %t.rv32.o -riscv-tbljal --defsym foo=_start+30 -o %t.rv32 +# RUN: ld.lld %t.rv64.o -riscv-tbljal --defsym foo=_start+30 -o %t.rv64 +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=TBLJAL %s +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t.rv64 | FileCheck --check-prefix=TBLJAL %s +# TBLJAL: cm.jalt 66 +# TBLJAL-NEXT: cm.jt 2 +# TBLJAL-NEXT: cm.jalt 67 +# TBLJAL-NEXT: cm.jalt 65 +# TBLJAL-NEXT: cm.jalt 65 +# TBLJAL-NEXT: cm.jalt 64 +# TBLJAL-NEXT: cm.jalt 64 +# TBLJAL-NEXT: cm.jalt 64 +# TBLJAL-NEXT: cm.jt 3 +# TBLJAL-NEXT: cm.jt 1 +# TBLJAL-NEXT: cm.jt 1 +# TBLJAL-NEXT: cm.jt 0 +# TBLJAL-NEXT: cm.jt 0 +# TBLJAL-NEXT: cm.jt 0 + +# Check the bounds of what would be out of range (for the first call) for other jump types. +# RUN: ld.lld %t.rv32.o -riscv-tbljal --defsym foo=_start+0x100000 -o %t-boundary.rv32 +# RUN: ld.lld %t.rv64.o -riscv-tbljal --defsym foo=_start+0x100000 -o %t-boundary.rv64 +# RUN: ld.lld %t.rv32.o --defsym foo=_start+0x100000 -o %t-oldboundary.rv32 +# RUN: ld.lld %t.rv64.o --defsym foo=_start+0x100000 -o %t-oldboundary.rv64 +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t-boundary.rv32 | FileCheck --check-prefix=BOUNDARY %s +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t-boundary.rv64 | FileCheck --check-prefix=BOUNDARY %s +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t-oldboundary.rv32 | FileCheck --check-prefix=OLDBOUNDARY %s +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t-oldboundary.rv64 | FileCheck --check-prefix=OLDBOUNDARY %s +# OLDBOUNDARY: auipc ra, 256 +# OLDBOUNDARY-NEXT: jalr ra, 0(ra) +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# OLDBOUNDARY-NEXT: jal ra, {{.*}} +# OLDBOUNDARY-NEXT: jal ra, {{.*}} +# OLDBOUNDARY-NEXT: jal ra, {{.*}} +# OLDBOUNDARY-NEXT: jal ra, {{.*}} +# OLDBOUNDARY-NEXT: jal ra, {{.*}} +# OLDBOUNDARY-NEXT: jal ra, {{.*}} +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# OLDBOUNDARY-NEXT: jal zero, {{.*}} +# BOUNDARY: cm.jalt 66 +# BOUNDARY-NEXT: cm.jt 2 +# BOUNDARY-NEXT: cm.jalt 67 +# BOUNDARY-NEXT: cm.jalt 65 +# BOUNDARY-NEXT: cm.jalt 65 +# BOUNDARY-NEXT: cm.jalt 64 +# BOUNDARY-NEXT: cm.jalt 64 +# BOUNDARY-NEXT: cm.jalt 64 +# BOUNDARY-NEXT: cm.jt 3 +# BOUNDARY-NEXT: cm.jt 1 +# BOUNDARY-NEXT: cm.jt 1 +# BOUNDARY-NEXT: cm.jt 0 +# BOUNDARY-NEXT: cm.jt 0 +# BOUNDARY-NEXT: cm.jt 0 + +# Check relaxation works across output sections +# echo 'SECTIONS { .text 0x100000 : { *(.text) } .foo : ALIGN(8) { foo = .; } }' > %t-cross-section.lds +# ld.lld %t.rv32c.o %t-cross-section.lds -o %t-cross-section.rv32 +# ld.lld %t.rv64c.o %t-cross-section.lds -o %t-cross-section.rv64 + +.global _start +.p2align 3 +_start: + call foo + tail foo + + call foo_1 + call foo_2 + call foo_2 + call foo_3 + call foo_3 + call foo_3 + tail foo_1 + tail foo_2 + tail foo_2 + tail foo_3 + tail foo_3 + tail foo_3 + +foo_1: + nop + +foo_2: + nop + +foo_3: + nop + diff --git a/lld/test/ELF/riscv-tbljal-syms.s b/lld/test/ELF/riscv-tbljal-syms.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-tbljal-syms.s @@ -0,0 +1,32 @@ +# REQUIRES: riscv + +// Check that relaxation correctly adjusts symbol addresses and sizes. + +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv64.o +# RUN: ld.lld -Ttext=0x100000 -riscv-tbljal %t.rv32.o -o %t.rv32 +# RUN: ld.lld -Ttext=0x100000 -riscv-tbljal %t.rv64.o -o %t.rv64 + +# RUN: llvm-readelf -s %t.rv32 | FileCheck %s +# RUN: llvm-readelf -s %t.rv64 | FileCheck %s + +# CHECK: 100000 4 NOTYPE LOCAL DEFAULT 1 a +# CHECK: 100000 6 NOTYPE LOCAL DEFAULT 1 b +# CHECK: 100004 2 NOTYPE LOCAL DEFAULT 1 c +# CHECK: 100004 6 NOTYPE LOCAL DEFAULT 1 d +# CHECK: 100000 10 NOTYPE GLOBAL DEFAULT 1 _start + +.global _start +_start: +a: +b: + add a0, a1, a2 +.size a, . - a +c: +d: + call _start +.size b, . - b +.size c, . - c + add a0, a1, a2 +.size d, . - d +.size _start, . - _start