Index: lld/ELF/Arch/X86_64.cpp =================================================================== --- lld/ELF/Arch/X86_64.cpp +++ lld/ELF/Arch/X86_64.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -37,6 +38,8 @@ uint64_t pltEntryAddr) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + void relocateOneJumpRelocation(uint8_t *Loc, JumpRelType Type, + unsigned Size) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; @@ -52,9 +55,22 @@ uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; + bool deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const override; }; } // namespace +static std::vector> X86_NOP_INSTRUCTIONS = { + {0x90}, + {0x66, 0x90}, + {0x0f, 0x1f, 0x00}, + {0x0f, 0x1f, 0x40, 0x00}, + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}}; + X86_64::X86_64() { copyRel = R_X86_64_COPY; gotRel = R_X86_64_GLOB_DAT; @@ -79,6 +95,206 @@ int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +// Opcodes for the different X86_64 jmp instructions. +enum JmpInsnOpcode : uint32_t { + J_JMP_32, + J_JNE_32, + J_JE_32, + J_JG_32, + J_JGE_32, + J_JB_32, + J_JBE_32, + J_JL_32, + J_JLE_32, + J_JA_32, + J_JAE_32, + J_UNKNOWN, +}; + +// Given the first (optional) and second byte of the insn's opcode, this +// returns the corresponding enum value. +static JmpInsnOpcode getJmpInsnType(const uint8_t *First, + const uint8_t *Second) { + if (*Second == 0xe9) + return J_JMP_32; + + if (First == nullptr) + return J_UNKNOWN; + + if (*First == 0x0f) { + switch (*Second) { + case 0x84: + return J_JE_32; + case 0x85: + return J_JNE_32; + case 0x8f: + return J_JG_32; + case 0x8d: + return J_JGE_32; + case 0x82: + return J_JB_32; + case 0x86: + return J_JBE_32; + case 0x8c: + return J_JL_32; + case 0x8e: + return J_JLE_32; + case 0x87: + return J_JA_32; + case 0x83: + return J_JAE_32; + } + } + return J_UNKNOWN; +} + +// Return the relocation index for input section IS with a specific Offset. +// Returns the maximum size of the vector if no such relocation is found. +static unsigned getRelocationWithOffset(const InputSection &IS, + uint64_t Offset) { + unsigned I = 0; + for (; I < IS.relocations.size(); ++I) { + if (IS.relocations[I].offset == Offset && IS.relocations[I].expr != R_NONE) + break; + } + return I; +} + +static bool isRelocationForJmpInsn(Relocation &R) { + return (R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 || + R.type == R_X86_64_PC8); +} + +static bool isDirectJmpInsnOpcode(const uint8_t *Opcode) { + return (*Opcode == 0xe9); +} + +// Return true if Relocaction R points to the first instruction in the +// next section. +// TODO: Delete this once a new relocation is added for this. +static bool isFallThruRelocation(InputSection &IS, InputFile *File, + InputSection *NextIS, Relocation &R) { + if (!isRelocationForJmpInsn(R)) + return false; + + uint64_t AddrLoc = (IS.getOutputSection())->addr + IS.outSecOff + R.offset; + uint64_t TargetOffset = + SignExtend64(InputSectionBase::getRelocTargetVA(File, R.type, R.addend, + AddrLoc, *R.sym, R.expr), + (config->wordsize * 8)); + + // If this jmp is a fall thru, the target offset is the beginning of the + // next section. + uint64_t NextSectionOffset = + NextIS->getOutputSection()->addr + NextIS->outSecOff; + if ((AddrLoc + 4 + TargetOffset) != NextSectionOffset) + return false; + + return true; +} + +// Return the jmp instruction opcode that is the inverse of the given +// opcode. For example, JE inverted is JNE. +static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) { + switch (opcode) { + case J_JE_32: + return J_JNE_32; + case J_JNE_32: + return J_JE_32; + case J_JG_32: + return J_JLE_32; + case J_JGE_32: + return J_JL_32; + case J_JB_32: + return J_JAE_32; + case J_JBE_32: + return J_JA_32; + case J_JL_32: + return J_JGE_32; + case J_JLE_32: + return J_JG_32; + case J_JA_32: + return J_JBE_32; + case J_JAE_32: + return J_JB_32; + default: + return J_UNKNOWN; + } + return J_UNKNOWN; +} + +// Deletes direct jump instruction in input sections that jumps to the +// following section as it is not required. If there are two consecutive jump +// instructions, it checks if they can be flipped and one can be deleted. +bool X86_64::deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const { + const unsigned SizeOfDirectJmpInsn = 5; + + if (NextIS == nullptr) + return false; + + if (IS.getSize() < SizeOfDirectJmpInsn) + return false; + + // If this jmp insn can be removed, it is the last insn and the + // relocation is 4 bytes before the end. + unsigned RIndex = getRelocationWithOffset(IS, (IS.getSize() - 4)); + if (RIndex == IS.relocations.size()) + return false; + + Relocation &R = IS.relocations[RIndex]; + + // Check if the relocation corresponds to a direct jmp. + const uint8_t *SecContents = IS.data().data(); + if (!isDirectJmpInsnOpcode(SecContents + R.offset - 1)) + return false; + + if (isFallThruRelocation(IS, File, NextIS, R)) { + // This is a fall thru and can be deleted. + R.expr = R_NONE; + R.offset = 0; + IS.drop_back(SizeOfDirectJmpInsn); + IS.SpecialFiller = X86_NOP_INSTRUCTIONS; + return true; + } + + // Now, check if flip and delete is possible. + const unsigned SizeOfJmpCCInsn = 6; + // To flip, there must be atleast one JmpCC and one direct jmp. + if (IS.getSize() < (SizeOfDirectJmpInsn + SizeOfJmpCCInsn)) + return 0; + + unsigned RbIndex = + getRelocationWithOffset(IS, (IS.getSize() - SizeOfDirectJmpInsn - 4)); + if (RbIndex == IS.relocations.size()) + return 0; + + Relocation &Rb = IS.relocations[RbIndex]; + + const uint8_t *JmpInsnB = SecContents + Rb.offset - 1; + JmpInsnOpcode JO_B = getJmpInsnType(JmpInsnB - 1, JmpInsnB); + if (JO_B == J_UNKNOWN) + return false; + + if (!isFallThruRelocation(IS, File, NextIS, Rb)) + return false; + + // jmpCC jumps to the fall thru block, the branch can be flipped and the + // jmp can be deleted. + JmpInsnOpcode JInvert = invertJmpOpcode(JO_B); + if (JInvert == J_UNKNOWN) + return false; + IS.addJumpRelocation({JInvert, (Rb.offset - 1), 4}); + // Move R's values to Rb except the offset. + Rb = {R.expr, R.type, Rb.offset, R.addend, R.sym}; + // Cancel R + R.expr = R_NONE; + R.offset = 0; + IS.drop_back(SizeOfDirectJmpInsn); + IS.SpecialFiller = X86_NOP_INSTRUCTIONS; + return true; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) @@ -357,6 +573,90 @@ "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } +void X86_64::relocateOneJumpRelocation(uint8_t *Loc, JumpRelType Type, + unsigned Size) const { + switch (Type) { + case J_JMP_32: + if (Size == 4) + *Loc = 0xe9; + else + *Loc = 0xeb; + break; + case J_JE_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x84; + } else + *Loc = 0x74; + break; + case J_JNE_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x85; + } else + *Loc = 0x75; + break; + case J_JG_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x8f; + } else + *Loc = 0x7f; + break; + case J_JGE_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x8d; + } else + *Loc = 0x7d; + break; + case J_JB_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x82; + } else + *Loc = 0x72; + break; + case J_JBE_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x86; + } else + *Loc = 0x76; + break; + case J_JL_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x8c; + } else + *Loc = 0x7c; + break; + case J_JLE_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x8e; + } else + *Loc = 0x7e; + break; + case J_JA_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x87; + } else + *Loc = 0x77; + break; + case J_JAE_32: + if (Size == 4) { + *(Loc - 1) = 0x0f; + *Loc = 0x83; + } else + *Loc = 0x73; + break; + default: + llvm_unreachable("Unknown Jump Relocation"); + } +} + void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { switch (rel.type) { case R_X86_64_8: Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -113,6 +113,7 @@ llvm::StringRef sysroot; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; + llvm::StringRef ltoBBSections; std::pair thinLTOObjectSuffixReplace; std::pair thinLTOPrefixReplace; std::string rpath; @@ -165,6 +166,7 @@ bool ltoCSProfileGenerate; bool ltoDebugPassManager; bool ltoNewPassManager; + bool ltoUniqueBBSectionNames; bool ltoWholeProgramVisibility; bool mergeArmExidx; bool mipsN32Abi = false; @@ -175,6 +177,7 @@ bool nostdlib; bool oFormatBinary; bool omagic; + bool optimizeBBJumps; bool optRemarksWithHotness; bool pacPlt; bool picThunk; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -882,6 +882,9 @@ config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false); config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common, !args.hasArg(OPT_relocatable)); + config->optimizeBBJumps = + args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false); + config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); config->disableVerify = args.hasArg(OPT_disable_verify); @@ -929,6 +932,10 @@ config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + config->ltoBBSections = args.getLastArgValue(OPT_lto_basicblock_sections); + config->ltoUniqueBBSectionNames = + args.hasFlag(OPT_lto_unique_bb_section_names, + OPT_no_lto_unique_bb_section_names, false); config->mapFile = args.getLastArgValue(OPT_Map); config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -128,6 +128,26 @@ return cast_or_null>(file); } + unsigned BytesDropped = 0; + + bool Trimmed = false; + + void drop_back(uint64_t num) { BytesDropped += num; } + + void push_back(uint64_t num) { + assert(BytesDropped >= num); + BytesDropped -= num; + } + + void trim() { + if (Trimmed) + return; + if (BytesDropped) { + rawData = rawData.drop_back(BytesDropped); + Trimmed = true; + } + } + ArrayRef data() const { if (uncompressedSize >= 0) uncompress(); @@ -183,12 +203,26 @@ // the mmap'ed output buffer. template void relocate(uint8_t *buf, uint8_t *bufEnd); void relocateAlloc(uint8_t *buf, uint8_t *bufEnd); + static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, + int64_t A, uint64_t P, const Symbol &Sym, + RelExpr Expr); // The native ELF reloc data type is not very convenient to handle. // So we convert ELF reloc records to our own records in Relocations.cpp. // This vector contains such "cooked" relocations. std::vector relocations; + llvm::Optional> Filler; + + // Special filler provides variable-length padding instructions. + // This has to be ordered by length. + llvm::Optional>> SpecialFiller; + + // These are artificial jump relocations. + std::vector JumpRelocations; + + void addJumpRelocation(JumpRelocation J) { JumpRelocations.push_back(J); } + // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find // such functions and adjust their prologues. This is very similar Index: lld/ELF/InputSection.cpp =================================================================== --- lld/ELF/InputSection.cpp +++ lld/ELF/InputSection.cpp @@ -138,7 +138,10 @@ return s->getSize(); if (uncompressedSize >= 0) return uncompressedSize; - return rawData.size(); + if (Trimmed) + return rawData.size(); + else + return rawData.size() - BytesDropped; } void InputSectionBase::uncompress() const { @@ -654,8 +657,9 @@ } } -static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, - uint64_t p, const Symbol &sym, RelExpr expr) { +uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + int64_t a, uint64_t p, + const Symbol &sym, RelExpr expr) { switch (expr) { case R_ABS: case R_DTPREL: @@ -862,6 +866,12 @@ if (expr == R_NONE) continue; + if (expr == R_SIZE) { + target->relocateNoSym(bufLoc, type, + SignExtend64(sym.getSize() + addend)); + continue; + } + if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) { std::string msg = getLocation(offset) + ": has non-ABS relocation " + toString(type) + @@ -933,6 +943,8 @@ const unsigned bits = config->wordsize * 8; for (const Relocation &rel : relocations) { + if (rel.expr == R_NONE) + continue; uint64_t offset = rel.offset; if (auto *sec = dyn_cast(this)) offset += sec->outSecOff; @@ -1002,6 +1014,19 @@ break; } } + + // Relocate JumpRelocations. JumpRelocations are created when the opcode of + // a jmp insn must be modified to shrink the jmp insn or to flip the jmp + // insn. This is primarily used to relax and optimize jumps created to use + // basic block sections. + if (auto *Sec = dyn_cast(this)) { + for (const JumpRelocation &JumpRel : JumpRelocations) { + uint64_t Offset = JumpRel.Offset; + Offset += Sec->outSecOff; + uint8_t *BufLoc = buf + Offset; + target->relocateOneJumpRelocation(BufLoc, JumpRel.Original, JumpRel.Size); + } + } } // For each function-defining prologue, find any calls to __morestack, Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -27,6 +27,7 @@ #include "llvm/LTO/Config.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/ProfileData/BBSectionsProf.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -76,6 +77,23 @@ c.Options.FunctionSections = true; c.Options.DataSections = true; + // Check if basic block sections must be used. + if (!config->ltoBBSections.empty()) { + if (config->ltoBBSections.equals("all")) + c.Options.BBSections = BasicBlockSection::All; + else if (config->ltoBBSections.equals("labels")) + c.Options.BBSections = BasicBlockSection::Labels; + else if (config->ltoBBSections.equals("none")) + c.Options.BBSections = BasicBlockSection::None; + else { + llvm::bbsections::getBBSectionsList(config->ltoBBSections, + c.Options.BBSectionsList); + c.Options.BBSections = BasicBlockSection::List; + } + } + + c.Options.UniqueBBSectionNames = config->ltoUniqueBBSectionNames; + if (auto relocModel = getRelocModelFromCMModel()) c.RelocModel = *relocModel; else if (config->relocatable) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -42,6 +42,10 @@ defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; +defm optimize_bb_jumps: B<"optimize-bb-jumps", + "Remove direct jumps at the end to the next basic block", + "Do not remove any direct jumps at the end to the next basic block">; + defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " @@ -499,6 +503,11 @@ HelpText<"The format used for serializing remarks (default: YAML)">; defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">; def save_temps: F<"save-temps">; +def lto_basicblock_sections: J<"lto-basicblock-sections=">, + HelpText<"Enable basic block sections for LTO">; +defm lto_unique_bb_section_names: B<"lto-unique-bb-section-names", + "Give unique names to every basic block section for LTO", + "Do not give unique names to every basic block section for LTO">; def thinlto_cache_dir: J<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; Index: lld/ELF/OutputSections.cpp =================================================================== --- lld/ELF/OutputSections.cpp +++ lld/ELF/OutputSections.cpp @@ -243,6 +243,22 @@ sortByOrder(isd->sections, order); } +static void fill(uint8_t *Buf, size_t Size, + const std::vector> &SFiller) { + unsigned I = 0; + unsigned NC = Size / SFiller.back().size(); + for (unsigned C = 0; C < NC; ++C) { + memcpy(Buf + I, SFiller.back().data(), SFiller.back().size()); + I += SFiller.back().size(); + } + unsigned remaining = Size - I; + if (!remaining) + return; + if (SFiller[remaining - 1].size() != remaining) + fatal("failed padding with special filler"); + memcpy(Buf + I, SFiller[remaining - 1].data(), remaining); +} + // Fill [Buf, Buf + Size) with Filler. // This is used for linker script "=fillexp" command. static void fill(uint8_t *buf, size_t size, @@ -331,7 +347,13 @@ end = buf + size; else end = buf + sections[i + 1]->outSecOff; - fill(start, end - start, filler); + // Check if this IS needs a special filler. + if (isec->SpecialFiller) + fill(start, end - start, *(isec->SpecialFiller)); + else if (isec->Filler) + fill(start, end - start, *(isec->Filler)); + else + fill(start, end - start, filler); } }); Index: lld/ELF/Relocations.h =================================================================== --- lld/ELF/Relocations.h +++ lld/ELF/Relocations.h @@ -24,6 +24,7 @@ // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL. using RelType = uint32_t; +using JumpRelType = uint32_t; // List of target-independent relocation types. Relocations read // from files are converted to these types so that the main code @@ -107,6 +108,13 @@ Symbol *sym; }; +// Artificial Relocations to manipulate jump instructions. +struct JumpRelocation { + JumpRelType Original; + uint64_t Offset; + unsigned Size; +}; + // This function writes undefined symbol diagnostics to an internal buffer. // Call reportUndefinedSymbols() after calling scanRelocations() to emit // the diagnostics. Index: lld/ELF/Target.h =================================================================== --- lld/ELF/Target.h +++ lld/ELF/Target.h @@ -88,8 +88,21 @@ relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val); } + virtual void relocateOneJumpRelocation(uint8_t *Loc, JumpRelType Type, + JumpRelType Val) const {} + virtual ~TargetInfo(); + // This deletes a jump insn at the end of the section if it is a fall thru to + // the next section. Further, if there is a conditional jump and a direct + // jump consecutively, it tries to flip the conditional jump to convert the + // direct jump into a fall thru and delete it. Returns true if a jump + // instruction can be deleted. + virtual bool deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const { + return false; + } + unsigned defaultCommonPageSize = 4096; unsigned defaultMaxPageSize = 4096; Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -25,11 +25,14 @@ #include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ProfileData/BBSectionsProf.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include + +#define DEBUG_TYPE "lld" using namespace llvm; using namespace llvm::ELF; @@ -57,6 +60,7 @@ void sortSections(); void resolveShfLinkOrder(); void finalizeAddressDependentContent(); + void optimizeBasicBlockJumps(); void sortInputSections(); void finalizeSections(); void checkExecuteOnly(); @@ -1608,6 +1612,85 @@ } } +// If Input Sections have been shrinked (basic block sections) then +// update symbol values and sizes associated with these sections. +static void fixSymbolsAfterShrinking() { + for (InputFile *File : objectFiles) { + parallelForEach(File->getSymbols(), [&](Symbol *Sym) { + auto *Def = dyn_cast(Sym); + if (!Def) + return; + + const SectionBase *Sec = Def->section; + if (!Sec) + return; + + const auto *InputSec = dyn_cast(Sec->repl); + if (!InputSec || !InputSec->BytesDropped) + return; + + const auto NewSize = InputSec->data().size(); + + if (Def->value > NewSize) { + LLVM_DEBUG(llvm::dbgs() + << "Moving symbol " << Sym->getName() << " from " + << Def->value << " to " + << Def->value - InputSec->BytesDropped << " bytes\n"); + Def->value -= InputSec->BytesDropped; + return; + } + + if (Def->value + Def->size > NewSize) { + LLVM_DEBUG(llvm::dbgs() + << "Shrinking symbol " << Sym->getName() << " from " + << Def->size << " to " << Def->size - InputSec->BytesDropped + << " bytes\n"); + Def->size -= InputSec->BytesDropped; + } + }); + } +} + +// If basic block sections exist, there are opportunities to delete fall thru +// jumps and shrink jump instructions after basic block reordering. This +// relaxation pass does that. +template void Writer::optimizeBasicBlockJumps() { + if (!config->optimizeBBJumps || !ELFT::Is64Bits) + return; + + script->assignAddresses(); + // For every output section that has executable input sections, this + // does 3 things: + // 1. It deletes all direct jump instructions in input sections that + // jump to the following section as it is not required. If there + // are two consecutive jump instructions, it checks if they can be + // flipped and one can be deleted. + for (OutputSection *OS : outputSections) { + if (!(OS->flags & SHF_EXECINSTR)) + continue; + std::vector Sections = getInputSections(OS); + std::vector Result(Sections.size()); + // Step 1: Delete all fall through jump instructions. Also, check if two + // consecutive jump instructions can be flipped so that a fall through jmp + // instruction can be deleted. + parallelForEachN(0, Sections.size(), [&](size_t I) { + InputSection *Next = + (I + 1) < Sections.size() ? Sections[I + 1] : nullptr; + InputSection &IS = *Sections[I]; + Result[I] = + target->deleteFallThruJmpInsn(IS, IS.getFile(), Next) ? 1 : 0; + }); + size_t NumDeleted = std::count(Result.begin(), Result.end(), 1); + if (NumDeleted > 0) { + script->assignAddresses(); + LLVM_DEBUG(llvm::dbgs() + << "Removing " << NumDeleted << " fall through jumps\n"); + } + } + + fixSymbolsAfterShrinking(); +} + static void finalizeSynthetic(SyntheticSection *sec) { if (sec && sec->isNeeded() && sec->getParent()) sec->finalizeContents(); @@ -1917,6 +2000,10 @@ finalizeSynthetic(in.symTab); finalizeSynthetic(in.ppc64LongBranchTarget); + // Relaxation to delete inter-basic block jumps created by basic block + // sections. + optimizeBasicBlockJumps(); + // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. Index: lld/test/ELF/bb-sections-delete-fallthru.s =================================================================== --- /dev/null +++ lld/test/ELF/bb-sections-delete-fallthru.s @@ -0,0 +1,31 @@ +# REQUIRES: x86 +## basicblock-sections tests. +## This simple test checks if redundant direct jumps are converted to +## implicit fallthrus. The jne must be converted to je and the direct +## jmp must be deleted. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: ld.lld -optimize-bb-jumps %t.o -o %t.out +# RUN: llvm-objdump -d %t.out| FileCheck %s --check-prefix=CHECK + +# CHECK: foo: +# CHECK-NEXT: nopl (%rax) +# CHECK-NEXT: {{[0-9|a-f| ]*}} je 3 +# CHECK-NOT: jmp + +# CHECK: a.BB.foo: + +.section .text,"ax",@progbits +# -- Begin function foo +.type foo,@function +foo: + nopl (%rax) + jne a.BB.foo + jmp aa.BB.foo + +.section .text,"ax",@progbits,unique,2 +a.BB.foo: + nopl (%rax) + +aa.BB.foo: + ret