Index: lld/ELF/Arch/X86_64.cpp =================================================================== --- lld/ELF/Arch/X86_64.cpp +++ lld/ELF/Arch/X86_64.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -37,6 +38,8 @@ uint64_t pltEntryAddr) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + void applyJumpInstrMod(uint8_t *Loc, JumpModType Type, + unsigned size) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; @@ -52,9 +55,22 @@ uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; + bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const override; }; } // namespace +static std::vector> X86_NOP_INSTRUCTIONS = { + {0x90}, + {0x66, 0x90}, + {0x0f, 0x1f, 0x00}, + {0x0f, 0x1f, 0x40, 0x00}, + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}}; + X86_64::X86_64() { copyRel = R_X86_64_COPY; gotRel = R_X86_64_GLOB_DAT; @@ -71,6 +87,7 @@ pltEntrySize = 16; ipltEntrySize = 16; trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 + nopInstrs = X86_NOP_INSTRUCTIONS; // Align to the large page size (known as a superpage or huge page). // FreeBSD automatically promotes large, superpage-aligned allocations. @@ -79,6 +96,201 @@ int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +// Opcodes for the different X86_64 jmp instructions. +enum JmpInsnOpcode : uint32_t { + J_JMP_32, + J_JNE_32, + J_JE_32, + J_JG_32, + J_JGE_32, + J_JB_32, + J_JBE_32, + J_JL_32, + J_JLE_32, + J_JA_32, + J_JAE_32, + J_UNKNOWN, +}; + +// Given the first (optional) and second byte of the insn's opcode, this +// returns the corresponding enum value. +static JmpInsnOpcode getJmpInsnType(const uint8_t *first, + const uint8_t *second) { + if (*second == 0xe9) + return J_JMP_32; + + if (first == nullptr) + return J_UNKNOWN; + + if (*first == 0x0f) { + switch (*second) { + case 0x84: + return J_JE_32; + case 0x85: + return J_JNE_32; + case 0x8f: + return J_JG_32; + case 0x8d: + return J_JGE_32; + case 0x82: + return J_JB_32; + case 0x86: + return J_JBE_32; + case 0x8c: + return J_JL_32; + case 0x8e: + return J_JLE_32; + case 0x87: + return J_JA_32; + case 0x83: + return J_JAE_32; + } + } + return J_UNKNOWN; +} + +// Return the relocation index for input section IS with a specific Offset. +// Returns the maximum size of the vector if no such relocation is found. +static unsigned getRelocationWithOffset(const InputSection &is, + uint64_t offset) { + unsigned i = 0; + for (; i < is.relocations.size(); ++i) { + if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE) + break; + } + return i; +} + +static bool isRelocationForJmpInsn(Relocation &R) { + return (R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 || + R.type == R_X86_64_PC8); +} + +static bool isDirectJmpInsnOpcode(const uint8_t *opcode) { + return (*opcode == 0xe9); +} + +// Return true if Relocation R points to the first instruction in the +// next section. +// TODO: Delete this once a new relocation is added for this. +static bool isFallThruRelocation(InputSection &is, InputFile *file, + InputSection *nextIS, Relocation &r) { + if (!isRelocationForJmpInsn(r)) + return false; + + uint64_t addrLoc = (is.getOutputSection())->addr + is.outSecOff + r.offset; + uint64_t targetOffset = + SignExtend64(InputSectionBase::getRelocTargetVA(file, r.type, r.addend, + addrLoc, *r.sym, r.expr), + (config->wordsize * 8)); + + // If this jmp is a fall thru, the target offset is the beginning of the + // next section. + uint64_t NextSectionOffset = + nextIS->getOutputSection()->addr + nextIS->outSecOff; + return ((addrLoc + 4 + targetOffset) == NextSectionOffset); +} + +// Return the jmp instruction opcode that is the inverse of the given +// opcode. For example, JE inverted is JNE. +static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) { + switch (opcode) { + case J_JE_32: + return J_JNE_32; + case J_JNE_32: + return J_JE_32; + case J_JG_32: + return J_JLE_32; + case J_JGE_32: + return J_JL_32; + case J_JB_32: + return J_JAE_32; + case J_JBE_32: + return J_JA_32; + case J_JL_32: + return J_JGE_32; + case J_JLE_32: + return J_JG_32; + case J_JA_32: + return J_JBE_32; + case J_JAE_32: + return J_JB_32; + } + return J_UNKNOWN; +} + +// Deletes direct jump instruction in input sections that jumps to the +// following section as it is not required. If there are two consecutive jump +// instructions, it checks if they can be flipped and one can be deleted. +bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const { + const unsigned sizeOfDirectJmpInsn = 5; + + if (nextIS == nullptr) + return false; + + if (is.getSize() < sizeOfDirectJmpInsn) + return false; + + // If this jmp insn can be removed, it is the last insn and the + // relocation is 4 bytes before the end. + unsigned rIndex = getRelocationWithOffset(is, (is.getSize() - 4)); + if (rIndex == is.relocations.size()) + return false; + + Relocation &r = is.relocations[rIndex]; + + // Check if the relocation corresponds to a direct jmp. + const uint8_t *secContents = is.data().data(); + if (!isDirectJmpInsnOpcode(secContents + r.offset - 1)) + return false; + + if (isFallThruRelocation(is, file, nextIS, r)) { + // This is a fall thru and can be deleted. + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; + } + + // Now, check if flip and delete is possible. + const unsigned sizeOfJmpCCInsn = 6; + // To flip, there must be atleast one JmpCC and one direct jmp. + if (is.getSize() < (sizeOfDirectJmpInsn + sizeOfJmpCCInsn)) + return 0; + + unsigned rbIndex = + getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4)); + if (rbIndex == is.relocations.size()) + return 0; + + Relocation &rB = is.relocations[rbIndex]; + + const uint8_t *jmpInsnB = secContents + rB.offset - 1; + JmpInsnOpcode jmpOpcode_B = getJmpInsnType(jmpInsnB - 1, jmpInsnB); + if (jmpOpcode_B == J_UNKNOWN) + return false; + + if (!isFallThruRelocation(is, file, nextIS, rB)) + return false; + + // jmpCC jumps to the fall thru block, the branch can be flipped and the + // jmp can be deleted. + JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcode_B); + if (jInvert == J_UNKNOWN) + return false; + is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4}); + // Move R's values to rB except the offset. + rB = {r.expr, r.type, rB.offset, r.addend, r.sym}; + // Cancel R + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) @@ -357,6 +569,90 @@ "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } +void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const { + switch (type) { + case J_JMP_32: + if (size == 4) + *loc = 0xe9; + else + *loc = 0xeb; + break; + case J_JE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x84; + } else + *loc = 0x74; + break; + case J_JNE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x85; + } else + *loc = 0x75; + break; + case J_JG_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8f; + } else + *loc = 0x7f; + break; + case J_JGE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8d; + } else + *loc = 0x7d; + break; + case J_JB_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x82; + } else + *loc = 0x72; + break; + case J_JBE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x86; + } else + *loc = 0x76; + break; + case J_JL_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8c; + } else + *loc = 0x7c; + break; + case J_JLE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8e; + } else + *loc = 0x7e; + break; + case J_JA_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x87; + } else + *loc = 0x77; + break; + case J_JAE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x83; + } else + *loc = 0x73; + break; + case J_UNKNOWN: + llvm_unreachable("Unknown Jump Relocation"); + } +} + void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { switch (rel.type) { case R_X86_64_8: Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -113,6 +113,7 @@ llvm::StringRef sysroot; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; + llvm::StringRef ltoBBSections; std::pair thinLTOObjectSuffixReplace; std::pair thinLTOPrefixReplace; std::string rpath; @@ -165,6 +166,7 @@ bool ltoCSProfileGenerate; bool ltoDebugPassManager; bool ltoNewPassManager; + bool ltoUniqueBBSectionNames; bool ltoWholeProgramVisibility; bool mergeArmExidx; bool mipsN32Abi = false; @@ -175,6 +177,7 @@ bool nostdlib; bool oFormatBinary; bool omagic; + bool optimizeBBJumps; bool optRemarksWithHotness; bool pacPlt; bool picThunk; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -882,6 +882,9 @@ config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false); config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common, !args.hasArg(OPT_relocatable)); + config->optimizeBBJumps = + args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false); + config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); config->disableVerify = args.hasArg(OPT_disable_verify); @@ -929,6 +932,10 @@ config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + config->ltoBBSections = args.getLastArgValue(OPT_lto_basicblock_sections); + config->ltoUniqueBBSectionNames = + args.hasFlag(OPT_lto_unique_bb_section_names, + OPT_no_lto_unique_bb_section_names, false); config->mapFile = args.getLastArgValue(OPT_Map); config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -128,6 +128,30 @@ return cast_or_null>(file); } + // If basic block sections are enabled, many code sections could end up with + // one or two jump instructions at the end that could be relaxed to a smaller + // instruction. The members below help trimming the trailing jump instruction + // and shrinking a section. + unsigned bytesDropped = 0; + + bool trimmed = false; + + void drop_back(uint64_t num) { bytesDropped += num; } + + void push_back(uint64_t num) { + assert(bytesDropped >= num); + bytesDropped -= num; + } + + void trim() { + if (trimmed) + return; + if (bytesDropped) { + rawData = rawData.drop_back(bytesDropped); + trimmed = true; + } + } + ArrayRef data() const { if (uncompressedSize >= 0) uncompress(); @@ -183,12 +207,25 @@ // the mmap'ed output buffer. template void relocate(uint8_t *buf, uint8_t *bufEnd); void relocateAlloc(uint8_t *buf, uint8_t *bufEnd); + static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, + int64_t A, uint64_t P, const Symbol &Sym, + RelExpr Expr); // The native ELF reloc data type is not very convenient to handle. // So we convert ELF reloc records to our own records in Relocations.cpp. // This vector contains such "cooked" relocations. std::vector relocations; + // Indicates that this section needs to be padded with a NOP filler if set to + // true. + bool nopFiller = false; + + // These are modifiers to jump instructions that are necessary when basic + // block sections are enabled. Basic block sections creates opportunities to + // relax jump instructions at basic block boundaries after reordering the + // basic blocks. + std::vector jumpInstrMods; + // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find // such functions and adjust their prologues. This is very similar Index: lld/ELF/InputSection.cpp =================================================================== --- lld/ELF/InputSection.cpp +++ lld/ELF/InputSection.cpp @@ -138,7 +138,10 @@ return s->getSize(); if (uncompressedSize >= 0) return uncompressedSize; - return rawData.size(); + if (trimmed) + return rawData.size(); + else + return rawData.size() - bytesDropped; } void InputSectionBase::uncompress() const { @@ -654,8 +657,9 @@ } } -static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, - uint64_t p, const Symbol &sym, RelExpr expr) { +uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + int64_t a, uint64_t p, + const Symbol &sym, RelExpr expr) { switch (expr) { case R_ABS: case R_DTPREL: @@ -862,6 +866,12 @@ if (expr == R_NONE) continue; + if (expr == R_SIZE) { + target->relocateNoSym(bufLoc, type, + SignExtend64(sym.getSize() + addend)); + continue; + } + if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) { std::string msg = getLocation(offset) + ": has non-ABS relocation " + toString(type) + @@ -933,6 +943,8 @@ const unsigned bits = config->wordsize * 8; for (const Relocation &rel : relocations) { + if (rel.expr == R_NONE) + continue; uint64_t offset = rel.offset; if (auto *sec = dyn_cast(this)) offset += sec->outSecOff; @@ -1002,6 +1014,19 @@ break; } } + + // Apply jumpInstrMods. jumpInstrMods are created when the opcode of + // a jmp insn must be modified to shrink the jmp insn or to flip the jmp + // insn. This is primarily used to relax and optimize jumps created with + // basic block sections. + if (auto *sec = dyn_cast(this)) { + for (const JumpInstrMod &jumpMod : jumpInstrMods) { + uint64_t offset = jumpMod.Offset; + offset += sec->outSecOff; + uint8_t *bufLoc = buf + offset; + target->applyJumpInstrMod(bufLoc, jumpMod.Original, jumpMod.Size); + } + } } // For each function-defining prologue, find any calls to __morestack, Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -27,6 +27,7 @@ #include "llvm/LTO/Config.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/ProfileData/BBSectionsProf.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -76,6 +77,23 @@ c.Options.FunctionSections = true; c.Options.DataSections = true; + // Check if basic block sections must be used. + if (!config->ltoBBSections.empty()) { + if (config->ltoBBSections == "all") + c.Options.BBSections = BasicBlockSection::All; + else if (config->ltoBBSections == "labels") + c.Options.BBSections = BasicBlockSection::Labels; + else if (config->ltoBBSections == "none") + c.Options.BBSections = BasicBlockSection::None; + else { + llvm::bbsections::getBBSectionsList(config->ltoBBSections, + c.Options.BBSectionsList); + c.Options.BBSections = BasicBlockSection::List; + } + } + + c.Options.UniqueBBSectionNames = config->ltoUniqueBBSectionNames; + if (auto relocModel = getRelocModelFromCMModel()) c.RelocModel = *relocModel; else if (config->relocatable) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -42,6 +42,10 @@ defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; +defm optimize_bb_jumps: B<"optimize-bb-jumps", + "Remove direct jumps at the end to the next basic block", + "Do not remove any direct jumps at the end to the next basic block">; + defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " @@ -499,6 +503,11 @@ HelpText<"The format used for serializing remarks (default: YAML)">; defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">; def save_temps: F<"save-temps">; +def lto_basicblock_sections: J<"lto-basicblock-sections=">, + HelpText<"Enable basic block sections for LTO">; +defm lto_unique_bb_section_names: B<"lto-unique-bb-section-names", + "Give unique names to every basic block section for LTO", + "Do not give unique names to every basic block section for LTO">; def thinlto_cache_dir: J<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; Index: lld/ELF/OutputSections.cpp =================================================================== --- lld/ELF/OutputSections.cpp +++ lld/ELF/OutputSections.cpp @@ -243,6 +243,22 @@ sortByOrder(isd->sections, order); } +static void nopInstrFill(uint8_t *Buf, size_t Size) { + unsigned i = 0; + auto nopFiller = *target->nopInstrs; + unsigned num = Size / nopFiller.back().size(); + for (unsigned C = 0; C < num; ++C) { + memcpy(Buf + i, nopFiller.back().data(), nopFiller.back().size()); + i += nopFiller.back().size(); + } + unsigned remaining = Size - i; + if (!remaining) + return; + if (nopFiller[remaining - 1].size() != remaining) + fatal("failed padding with special filler"); + memcpy(Buf + i, nopFiller[remaining - 1].data(), remaining); +} + // Fill [Buf, Buf + Size) with Filler. // This is used for linker script "=fillexp" command. static void fill(uint8_t *buf, size_t size, @@ -331,7 +347,11 @@ end = buf + size; else end = buf + sections[i + 1]->outSecOff; - fill(start, end - start, filler); + // Check if this IS needs a special filler. + if (isec->nopFiller && target->nopInstrs) + nopInstrFill(start, end - start); + else + fill(start, end - start, filler); } }); Index: lld/ELF/Relocations.h =================================================================== --- lld/ELF/Relocations.h +++ lld/ELF/Relocations.h @@ -24,6 +24,7 @@ // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL. using RelType = uint32_t; +using JumpModType = uint32_t; // List of target-independent relocation types. Relocations read // from files are converted to these types so that the main code @@ -107,6 +108,15 @@ Symbol *sym; }; +// Manipulate jump instructions with these modifiers. These are used to relax +// jump instruction opcodes at basic block boundaries and are particularly +// useful when basic block sections are enabled. +struct JumpInstrMod { + JumpModType Original; + uint64_t Offset; + unsigned Size; +}; + // This function writes undefined symbol diagnostics to an internal buffer. // Call reportUndefinedSymbols() after calling scanRelocations() to emit // the diagnostics. Index: lld/ELF/Target.h =================================================================== --- lld/ELF/Target.h +++ lld/ELF/Target.h @@ -88,8 +88,21 @@ relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val); } + virtual void applyJumpInstrMod(uint8_t *Loc, JumpModType Type, + JumpModType Val) const {} + virtual ~TargetInfo(); + // This deletes a jump insn at the end of the section if it is a fall thru to + // the next section. Further, if there is a conditional jump and a direct + // jump consecutively, it tries to flip the conditional jump to convert the + // direct jump into a fall thru and delete it. Returns true if a jump + // instruction can be deleted. + virtual bool deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const { + return false; + } + unsigned defaultCommonPageSize = 4096; unsigned defaultMaxPageSize = 4096; @@ -126,6 +139,10 @@ // executable OutputSections. std::array trapInstr; + // Stores the NOP instructions of different sizes for the target and is used + // to pad sections that are relaxed. + llvm::Optional>> nopInstrs; + // If a target needs to rewrite calls to __morestack to instead call // __morestack_non_split when a split-stack enabled caller calls a // non-split-stack callee this will return true. Otherwise returns false. Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -30,6 +30,8 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include + +#define DEBUG_TYPE "lld" using namespace llvm; using namespace llvm::ELF; @@ -57,6 +59,7 @@ void sortSections(); void resolveShfLinkOrder(); void finalizeAddressDependentContent(); + void optimizeBasicBlockJumps(); void sortInputSections(); void finalizeSections(); void checkExecuteOnly(); @@ -1608,6 +1611,85 @@ } } +// If Input Sections have been shrinked (basic block sections) then +// update symbol values and sizes associated with these sections. +static void fixSymbolsAfterShrinking() { + for (InputFile *File : objectFiles) { + parallelForEach(File->getSymbols(), [&](Symbol *Sym) { + auto *def = dyn_cast(Sym); + if (!def) + return; + + const SectionBase *sec = def->section; + if (!sec) + return; + + const auto *inputSec = dyn_cast(sec->repl); + if (!inputSec || !inputSec->bytesDropped) + return; + + const auto NewSize = inputSec->data().size(); + + if (def->value > NewSize) { + LLVM_DEBUG(llvm::dbgs() + << "Moving symbol " << Sym->getName() << " from " + << def->value << " to " + << def->value - inputSec->bytesDropped << " bytes\n"); + def->value -= inputSec->bytesDropped; + return; + } + + if (def->value + def->size > NewSize) { + LLVM_DEBUG(llvm::dbgs() + << "Shrinking symbol " << Sym->getName() << " from " + << def->size << " to " << def->size - inputSec->BytesDropped + << " bytes\n"); + def->size -= inputSec->bytesDropped; + } + }); + } +} + +// If basic block sections exist, there are opportunities to delete fall thru +// jumps and shrink jump instructions after basic block reordering. This +// relaxation pass does that. +template void Writer::optimizeBasicBlockJumps() { + if (!config->optimizeBBJumps || !ELFT::Is64Bits) + return; + + script->assignAddresses(); + // For every output section that has executable input sections, this + // does 3 things: + // 1. It deletes all direct jump instructions in input sections that + // jump to the following section as it is not required. If there + // are two consecutive jump instructions, it checks if they can be + // flipped and one can be deleted. + for (OutputSection *os : outputSections) { + if (!(os->flags & SHF_EXECINSTR)) + continue; + std::vector sections = getInputSections(os); + std::vector result(sections.size()); + // Step 1: Delete all fall through jump instructions. Also, check if two + // consecutive jump instructions can be flipped so that a fall through jmp + // instruction can be deleted. + parallelForEachN(0, sections.size(), [&](size_t i) { + InputSection *next = + (i + 1) < sections.size() ? sections[i + 1] : nullptr; + InputSection &is = *sections[i]; + result[i] = + target->deleteFallThruJmpInsn(is, is.getFile(), next) ? 1 : 0; + }); + size_t numDeleted = std::count(result.begin(), result.end(), 1); + if (numDeleted > 0) { + script->assignAddresses(); + LLVM_DEBUG(llvm::dbgs() + << "Removing " << numDeleted << " fall through jumps\n"); + } + } + + fixSymbolsAfterShrinking(); +} + static void finalizeSynthetic(SyntheticSection *sec) { if (sec && sec->isNeeded() && sec->getParent()) sec->finalizeContents(); @@ -1917,6 +1999,10 @@ finalizeSynthetic(in.symTab); finalizeSynthetic(in.ppc64LongBranchTarget); + // Relaxation to delete inter-basic block jumps created by basic block + // sections. + optimizeBasicBlockJumps(); + // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. Index: lld/test/ELF/bb-sections-delete-fallthru.s =================================================================== --- /dev/null +++ lld/test/ELF/bb-sections-delete-fallthru.s @@ -0,0 +1,31 @@ +# REQUIRES: x86 +## basicblock-sections tests. +## This simple test checks if redundant direct jumps are converted to +## implicit fallthrus. The jne must be converted to je and the direct +## jmp must be deleted. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: ld.lld -optimize-bb-jumps %t.o -o %t.out +# RUN: llvm-objdump -d %t.out| FileCheck %s --check-prefix=CHECK + +# CHECK: foo: +# CHECK-NEXT: nopl (%rax) +# CHECK-NEXT: {{[0-9|a-f| ]*}} je 3 +# CHECK-NOT: jmp + +# CHECK: a.BB.foo: + +.section .text,"ax",@progbits +# -- Begin function foo +.type foo,@function +foo: + nopl (%rax) + jne a.BB.foo + jmp aa.BB.foo + +.section .text,"ax",@progbits,unique,2 +a.BB.foo: + nopl (%rax) + +aa.BB.foo: + ret