diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -38,6 +38,8 @@ void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; + void writeTableJumpHeader(uint8_t *buf) const override; + void writeTableJump(uint8_t *buf, const uint64_t symbol) const override; RelType getDynRel(RelType type) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; @@ -246,6 +248,20 @@ write32le(buf + 12, itype(ADDI, 0, 0, 0)); } +void RISCV::writeTableJumpHeader(uint8_t *buf) const { + if (config->is64) + write64le(buf, mainPart->dynamic->getVA()); + else + write32le(buf, mainPart->dynamic->getVA()); +} + +void RISCV::writeTableJump(uint8_t *buf, const uint64_t address) const { + if (config->is64) + write64le(buf, address); + else + write32le(buf, address); +} + RelType RISCV::getDynRel(RelType type) const { return type == target->symbolicRel ? type : static_cast(R_RISCV_NONE); @@ -318,9 +334,13 @@ switch (rel.type) { case R_RISCV_32: + if (config->riscvTbljal && (read16le(loc) & 0xfc03) == 0xa002) + return; write32le(loc, val); return; case R_RISCV_64: + if (config->riscvTbljal && (read16le(loc) & 0xfc03) == 0xa002) + return; write64le(loc, val); return; @@ -578,6 +598,33 @@ } } +static bool relaxZcmt(const InputSection &sec, size_t i, uint64_t loc, + Relocation &r, uint32_t &remove) { + if (!in.riscvTableJumpSection || !in.riscvTableJumpSection->isFinalized) + return false; + + const auto jalr = sec.contentMaybeDecompress().data()[r.offset + 4]; + const uint8_t rd = extractBits(jalr, 11, 7); + int tblEntryIndex = -1; + if (rd == 0) { + tblEntryIndex = in.riscvTableJumpSection->getCMJTEntryIndex(r.sym); + } else if (rd == X_RA) { + tblEntryIndex = in.riscvTableJumpSection->getCMJALTEntryIndex(r.sym); + } + + if (tblEntryIndex >= 0) { + if (config->is64) + sec.relaxAux->relocTypes[i] = R_RISCV_64; + else + sec.relaxAux->relocTypes[i] = R_RISCV_32; + sec.relaxAux->writes.push_back(0xa002 | + (tblEntryIndex << 2)); // cm.jt or cm.jalt + remove = 6; + return true; + } + return false; +} + // Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal. static void relaxCall(const InputSection &sec, size_t i, uint64_t loc, Relocation &r, uint32_t &remove) { @@ -598,7 +645,7 @@ sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP; sec.relaxAux->writes.push_back(0x2001); // c.jal remove = 6; - } else if (isInt<21>(displace)) { + } else if (!relaxZcmt(sec, i, loc, r, remove) && isInt<21>(displace)) { sec.relaxAux->relocTypes[i] = R_RISCV_JAL; sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal remove = 4; @@ -715,6 +762,11 @@ if (i + 1 != sec.relocs().size() && sec.relocs()[i + 1].type == R_RISCV_RELAX) relaxHi20Lo12(sec, i, loc, r, remove); + + case R_RISCV_JAL: + if (i + 1 != sec.relocations.size() && + sec.relocations[i + 1].type == R_RISCV_RELAX) + relaxZcmt(sec, i, loc, r, remove); break; } @@ -843,12 +895,25 @@ skip = 4; write32le(p, aux.writes[writesIdx++]); break; + case R_RISCV_64: + if (config->riscvTbljal && + (aux.writes[writesIdx] & 0xfc03) == 0xa002) { + skip = 2; + write16le(p, aux.writes[writesIdx++]); + } + break; case R_RISCV_32: - // Used by relaxTlsLe to write a uint32_t then suppress the handling - // in relocateAlloc. - skip = 4; - write32le(p, aux.writes[writesIdx++]); - aux.relocTypes[i] = R_RISCV_NONE; + if (config->riscvTbljal && + (aux.writes[writesIdx] & 0xfc03) == 0xa002) { + skip = 2; + write16le(p, aux.writes[writesIdx++]); + } else { + // Used by relaxTlsLe to write a uint32_t then suppress the + // handling in relocateAlloc. + skip = 4; + write32le(p, aux.writes[writesIdx++]); + aux.relocTypes[i] = R_RISCV_NONE; + } break; default: llvm_unreachable("unsupported type"); @@ -1076,3 +1141,191 @@ static RISCV target; return ⌖ } + +TableJumpSection::TableJumpSection() + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, + config->wordsize, ".riscv.jvt") {} + +void TableJumpSection::addCMJTEntryCandidate(const Symbol *symbol, int gain) { + addEntry(symbol, CMJTEntryCandidates, gain); +} + +int TableJumpSection::getCMJTEntryIndex(const Symbol *symbol) { + uint32_t index = getEntry(symbol, maxCMJTEntrySize, finalizedCMJTEntries); + return index < finalizedCMJTEntries.size() ? (int)(startCMJTEntryIdx + index) + : -1; +} + +void TableJumpSection::addCMJALTEntryCandidate(const Symbol *symbol, int gain) { + addEntry(symbol, CMJALTEntryCandidates, gain); +} + +int TableJumpSection::getCMJALTEntryIndex(const Symbol *symbol) { + uint32_t index = getEntry(symbol, maxCMJALTEntrySize, finalizedCMJALTEntries); + return index < finalizedCMJALTEntries.size() + ? (int)(startCMJALTEntryIdx + index) + : -1; +} + +void TableJumpSection::addEntry( + const Symbol *symbol, llvm::DenseMap &entriesList, + int gain) { + entriesList[symbol] += gain; +} + +uint32_t TableJumpSection::getEntry( + const Symbol *symbol, uint32_t maxSize, + SmallVector, 0> + &entriesList) { + // Prevent adding duplicate entries + uint32_t i = 0; + + for (; i < entriesList.size() && i <= maxSize; i++) { + // If this is a duplicate addition, do not add it and return the address + // offset of the original entry. + if (symbol == entriesList[i].first) { + return i; + } + } + return i; +} + +void TableJumpSection::scanTableJumpEntrys(const InputSection &sec) const { + for (auto [i, r] : llvm::enumerate(sec.relocations)) { + Defined *definedSymbol = dyn_cast(r.sym); + if (!definedSymbol) + continue; + if (i + 1 == sec.relocs().size() || + sec.relocs()[i + 1].type != R_RISCV_RELAX) + continue; + switch (r.type) { + case R_RISCV_JAL: + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: { + const auto jalr = sec.contentMaybeDecompress().data()[r.offset + 4]; + const uint8_t rd = extractBits(jalr, 11, 7); + + int gain = 6; + if (sec.relaxAux->relocTypes[i] == R_RISCV_RVC_JUMP) + continue; + else if (sec.relaxAux->relocTypes[i] == R_RISCV_JAL) + gain = 2; + + if (rd == 0) + in.riscvTableJumpSection->addCMJTEntryCandidate(r.sym, gain); + else if (rd == X_RA) + in.riscvTableJumpSection->addCMJALTEntryCandidate(r.sym, gain); + } + } + } +} + +void TableJumpSection::finalizeContents() { + if (isFinalized) + return; + isFinalized = true; + + finalizedCMJTEntries = finalizeEntry(CMJTEntryCandidates, maxCMJTEntrySize); + finalizedCMJALTEntries = + finalizeEntry(CMJALTEntryCandidates, maxCMJALTEntrySize); + CMJTEntryCandidates.clear(); + CMJALTEntryCandidates.clear(); + + if (finalizedCMJALTEntries.size() > 0) { + int gainRequired = maxCMJTEntrySize * config->wordsize; + for (auto entry : finalizedCMJTEntries) { + gainRequired -= entry.second; + } + if (gainRequired > 0) { + for (auto entry : finalizedCMJALTEntries) { + gainRequired -= (entry.second - config->wordsize); + if (gainRequired <= 0) + break; + } + } + + // Stop relax to cm.jalt if there will be negative effect + if (gainRequired > 0) + finalizedCMJALTEntries.clear(); + } +} + +SmallVector, 0> +TableJumpSection::finalizeEntry(llvm::DenseMap EntryMap, + uint32_t maxSize) { + auto cmp = [](const llvm::detail::DenseMapPair &p1, + const llvm::detail::DenseMapPair &p2) { + return p1.second > p2.second; + }; + + SmallVector, 0> + tempEntryVector; + std::copy(EntryMap.begin(), EntryMap.end(), + std::back_inserter(tempEntryVector)); + std::sort(tempEntryVector.begin(), tempEntryVector.end(), cmp); + + auto finalizedVector = tempEntryVector; + if (tempEntryVector.size() >= maxSize) + finalizedVector = + SmallVector, 0>( + tempEntryVector.begin(), tempEntryVector.begin() + maxSize); + + // drop the item which has negitive effect + while (finalizedVector.size()) { + if (finalizedVector.rbegin()->second < config->wordsize) + finalizedVector.pop_back(); + else + break; + } + return finalizedVector; +} + +size_t TableJumpSection::getSize() const { + if (isFinalized) { + if (!finalizedCMJALTEntries.empty()) + return (startCMJALTEntryIdx + finalizedCMJALTEntries.size()) * + config->wordsize; + return (startCMJTEntryIdx + finalizedCMJTEntries.size()) * config->wordsize; + } else { + if (!CMJALTEntryCandidates.empty()) + return (startCMJALTEntryIdx + CMJALTEntryCandidates.size()) * + config->wordsize; + return (startCMJTEntryIdx + CMJTEntryCandidates.size()) * config->wordsize; + } +} + +void TableJumpSection::writeTo(uint8_t *buf) { + target->writeTableJumpHeader(buf); + writeEntries(buf + startCMJTEntryIdx * config->wordsize, + finalizedCMJTEntries); + padWords(buf + ((startCMJTEntryIdx + finalizedCMJTEntries.size()) * + config->wordsize), + startCMJALTEntryIdx); + writeEntries(buf + (startCMJALTEntryIdx * config->wordsize), + finalizedCMJALTEntries); +} + +void TableJumpSection::padWords(uint8_t *buf, const uint8_t maxWordCount) { + for (size_t i = 0; i < maxWordCount; ++i) { + if (config->is64) + write64le(buf, 0); + else + write32le(buf, 0); + } +} + +void TableJumpSection::writeEntries( + uint8_t *buf, + SmallVector, 0> + &entriesList) { + for (const auto &entry : entriesList) { + if (entry.second == 0) + continue; + // Use the symbol from in.symTab to ensure we have the final adjusted + // symbol. + if (!entry.first->isDefined()) + continue; + target->writeTableJump(buf, entry.first->getVA()); + buf += config->wordsize; + } +} diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -257,6 +257,7 @@ bool relocatable; bool relrGlibc = false; bool relrPackDynRelocs = false; + bool riscvTbljal; llvm::DenseSet saveTempsArgs; llvm::SmallVector, 0> shuffleSections; bool singleRoRx; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1298,6 +1298,7 @@ config->whyExtract = args.getLastArgValue(OPT_why_extract); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); + config->riscvTbljal = args.hasArg(OPT_riscv_tbljal); config->zForceBti = hasZOption(args, "force-bti"); config->zForceIbt = hasZOption(args, "force-ibt"); config->zGlobal = hasZOption(args, "global"); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -401,7 +401,7 @@ template void copyShtGroup(uint8_t *buf); }; -static_assert(sizeof(InputSection) <= 152, "InputSection is too big"); +static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); class SyntheticSection : public InputSection { public: diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -320,6 +320,9 @@ "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; +def riscv_tbljal: FF<"riscv-tbljal">, + HelpText<"(RISC-V only) Enable table jump instructions from the Zcmt extension">; + def pic_veneer: F<"pic-veneer">, HelpText<"Always generate position independent thunks (veneers)">; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -366,6 +366,49 @@ SmallVector entries; }; +class TableJumpSection final : public SyntheticSection { +public: + TableJumpSection(); + size_t getSize() const override; + void writeTo(uint8_t *buf) override; + void finalizeContents() override; + + void addCMJTEntryCandidate(const Symbol *symbol, int gain); + int getCMJTEntryIndex(const Symbol *symbol); + void addCMJALTEntryCandidate(const Symbol *symbol, int gain); + int getCMJALTEntryIndex(const Symbol *symbol); + void scanTableJumpEntrys(const InputSection &sec) const; + + bool isFinalized = false; + +private: + SmallVector, 0> + finalizeEntry(llvm::DenseMap EntryMap, uint32_t maxSize); + void addEntry(const Symbol *symbol, + llvm::DenseMap &entriesList, int gain); + uint32_t getEntry(const Symbol *symbol, uint32_t maxSize, + SmallVector, + 0> &entriesList); + void writeEntries(uint8_t *buf, + SmallVector, + 0> &entriesList); + void padWords(uint8_t *buf, const uint8_t maxWordCount); + + // used in finalizeContents function. + static const size_t maxCMJTEntrySize = 32; + static const size_t maxCMJALTEntrySize = 224; + + static const size_t startCMJTEntryIdx = 0; + static const size_t startCMJALTEntryIdx = 32; + + llvm::DenseMap CMJTEntryCandidates; + SmallVector, 0> + finalizedCMJTEntries; + llvm::DenseMap CMJALTEntryCandidates; + SmallVector, 0> + finalizedCMJALTEntries; +}; + // The IgotPltSection is a Got associated with the PltSection for GNU Ifunc // Symbols that will be relocated by Target->IRelativeRel. // On most Targets the IgotPltSection will immediately follow the GotPltSection @@ -1280,6 +1323,7 @@ std::unique_ptr gotPlt; std::unique_ptr igotPlt; std::unique_ptr ppc64LongBranchTarget; + std::unique_ptr riscvTableJumpSection; std::unique_ptr mipsAbiFlags; std::unique_ptr mipsGot; std::unique_ptr mipsOptions; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" -#include using namespace llvm; using namespace llvm::dwarf; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -34,6 +34,8 @@ virtual void writeGotPltHeader(uint8_t *buf) const {} virtual void writeGotHeader(uint8_t *buf) const {} virtual void writeGotPlt(uint8_t *buf, const Symbol &s) const {}; + virtual void writeTableJumpHeader(uint8_t *buf) const {}; + virtual void writeTableJump(uint8_t *buf, const uint64_t symbol) const {}; virtual void writeIgotPlt(uint8_t *buf, const Symbol &s) const {} virtual int64_t getImplicitAddend(const uint8_t *buf, RelType type) const; virtual int getTlsGdRelaxSkip(RelType type) const { return 1; } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -449,6 +449,15 @@ add(*in.ppc64LongBranchTarget); } + if (config->emachine == EM_RISCV && config->riscvTbljal) { + in.riscvTableJumpSection = std::make_unique(); + add(*in.riscvTableJumpSection); + + symtab.addSymbol(Defined{ + /*file=*/nullptr, "__jvt_base$", STB_GLOBAL, STT_NOTYPE, STT_NOTYPE, + /*value=*/0, /*size=*/0, in.riscvTableJumpSection.get()}); + } + in.gotPlt = std::make_unique(); add(*in.gotPlt); in.igotPlt = std::make_unique(); @@ -1664,6 +1673,20 @@ script->assignAddresses(); changed |= a32p.createFixes(); } + if (config->riscvTbljal) { + if (!changed) { + // scan all R_RISCV_JAL, R_RISCV_CALL/R_RISCV_CALL_PLT for RISCV Zcmt + // Jump table. + if (in.riscvTableJumpSection) { + for (InputSectionBase *inputSection : ctx.inputSections) { + in.riscvTableJumpSection->scanTableJumpEntrys( + cast(*inputSection)); + } + in.riscvTableJumpSection->finalizeContents(); + changed |= target->relaxOnce(pass); + } + } + } if (in.mipsGot) in.mipsGot->updateAllocSize(); diff --git a/lld/test/ELF/riscv-tbljal-call.s b/lld/test/ELF/riscv-tbljal-call.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-tbljal-call.s @@ -0,0 +1,88 @@ +# REQUIRES: riscv + +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv64.o + +# tbljal conversion +# RUN: ld.lld %t.rv32.o --riscv-tbljal --defsym foo=_start+0x150000 -o %t.rv32 +# RUN: ld.lld %t.rv64.o --riscv-tbljal --defsym foo=_start+0x150000 -o %t.rv64 +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=TBLJAL32 %s +# RUN: llvm-objdump -d -M no-aliases --mattr=+experimental-zcmt --no-show-raw-insn %t.rv64 | FileCheck --check-prefix=TBLJAL64 %s +# TBLJAL32: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jalt 32 +# TBLJAL32-NEXT: cm.jt 2 +# TBLJAL32-NEXT: cm.jt 2 +# TBLJAL32-NEXT: cm.jt 1 +# TBLJAL32-NEXT: cm.jt 1 +# TBLJAL32-NEXT: cm.jt 1 +# TBLJAL32-NEXT: cm.jt 0 +# TBLJAL32-NEXT: cm.jt 0 +# TBLJAL32-NEXT: cm.jt 0 +# TBLJAL32-NEXT: cm.jt 0 + +# TBLJAL64: cm.jt 0 +# TBLJAL64-NEXT: cm.jt 0 +# TBLJAL64-NEXT: cm.jt 0 +# TBLJAL64-NEXT: cm.jt 0 + + +.global _start +.p2align 3 +_start: + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + call foo + tail foo_1 + tail foo_1 + tail foo_2 + tail foo_2 + tail foo_2 + tail foo_3 + tail foo_3 + tail foo_3 + tail foo_3 + +foo_1: + nop + +foo_2: + nop + +foo_3: + nop + diff --git a/lld/test/ELF/riscv-tbljal-syms.s b/lld/test/ELF/riscv-tbljal-syms.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-tbljal-syms.s @@ -0,0 +1,32 @@ +# REQUIRES: riscv + +// Check that relaxation correctly adjusts symbol addresses and sizes. + +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax -mattr=+experimental-zcmt %s -o %t.rv64.o +# RUN: ld.lld -Ttext=0x100000 --riscv-tbljal %t.rv32.o -o %t.rv32 +# RUN: ld.lld -Ttext=0x100000 --riscv-tbljal %t.rv64.o -o %t.rv64 + +# RUN: llvm-readelf -s %t.rv32 | FileCheck %s +# RUN: llvm-readelf -s %t.rv64 | FileCheck %s + +# CHECK: 00100000 4 NOTYPE LOCAL DEFAULT 1 a +# CHECK: 00100000 8 NOTYPE LOCAL DEFAULT 1 b +# CHECK: 00100004 4 NOTYPE LOCAL DEFAULT 1 c +# CHECK: 00100004 8 NOTYPE LOCAL DEFAULT 1 d +# CHECK: 00100000 12 NOTYPE GLOBAL DEFAULT 1 _start + +.global _start +_start: +a: +b: + add a0, a1, a2 +.size a, . - a +c: +d: + call _start +.size b, . - b +.size c, . - c + add a0, a1, a2 +.size d, . - d +.size _start, . - _start