Index: lld/Common/Args.cpp =================================================================== --- lld/Common/Args.cpp +++ lld/Common/Args.cpp @@ -41,6 +41,21 @@ return 0; } +double lld::args::getFloat(opt::InputArgList &args, unsigned key, + double Default) { + auto *a = args.getLastArg(key); + if (!a) + return Default; + + double v; + if (to_float(a->getValue(), v)) + return v; + + StringRef spelling = args.getArgString(a->getIndex()); + error(spelling + ": number expected, but got '" + a->getValue() + "'"); + return 0; +} + std::vector lld::args::getStrings(opt::InputArgList &args, int id) { std::vector v; for (auto *arg : args.filtered(id)) Index: lld/ELF/Arch/X86_64.cpp =================================================================== --- lld/ELF/Arch/X86_64.cpp +++ lld/ELF/Arch/X86_64.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -35,6 +36,8 @@ void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocateOneJumpRelocation(uint8_t *Loc, JumpRelType Type, + unsigned Size) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; @@ -45,9 +48,27 @@ void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; + bool deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const override; + unsigned shrinkJmpInsn(InputSection &IS, InputFile *File, + uint32_t MaxAlignment) const override; + unsigned growJmpInsn(InputSection &IS, InputFile *File, + uint32_t MaxAlignment) const override; }; } // namespace +static std::vector> X86_NOP_INSTRUCTIONS = { + {0x90}, + {0x66, 0x90}, + {0x0f, 0x1f, 0x00}, + {0x0f, 0x1f, 0x40, 0x00}, + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} +}; + X86_64::X86_64() { copyRel = R_X86_64_COPY; gotRel = R_X86_64_GLOB_DAT; @@ -71,6 +92,481 @@ int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +// Opcodes for the different X86_64 jmp instructions. +enum JmpInsnOpcode { + J_JMP_32, + J_JNE_32, + J_JE_32, + J_JG_32, + J_JGE_32, + J_JB_32, + J_JBE_32, + J_JL_32, + J_JLE_32, + J_JA_32, + J_JAE_32, + J_UNKNOWN, +}; + +// Given the first (optional) and second byte of the insn's opcode, this +// returns the corresponding enum value. +static JmpInsnOpcode getJmpInsnType(const uint8_t *First, + const uint8_t *Second) { + if (*Second == 0xe9) + return J_JMP_32; + + if (First == nullptr) + return J_UNKNOWN; + + if (*First == 0x0f) { + switch (*Second) { + case 0x84: + return J_JE_32; + case 0x85: + return J_JNE_32; + case 0x8f: + return J_JG_32; + case 0x8d: + return J_JGE_32; + case 0x82: + return J_JB_32; + case 0x86: + return J_JBE_32; + case 0x8c: + return J_JL_32; + case 0x8e: + return J_JLE_32; + case 0x87: + return J_JA_32; + case 0x83: + return J_JAE_32; + } + } + return J_UNKNOWN; +} + +static unsigned getRelocationWithOffset(const InputSection &IS, + uint64_t Offset) { + unsigned I = 0; + for (; I < IS.relocations.size(); ++I) { + if (IS.relocations[I].offset == Offset && + IS.relocations[I].expr != R_NONE) + break; + } + return I; +} + +static unsigned getJumpRelocationWithOffset(const InputSection &IS, + uint64_t Offset) { + unsigned I = 0; + for (; I < IS.JumpRelocations.size(); ++I) { + if (IS.JumpRelocations[I].Offset == Offset) + break; + } + return I; +} + +static bool isRelocationForJmpInsn(Relocation &R) { + return (R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 || R.type == R_X86_64_PC8); +} + +static bool isDirectJmpInsnOpcode(const uint8_t *Opcode) { + return (*Opcode == 0xe9); +} + + +// Return true if Relocaction R points to the first instruction in the +// next section. +static bool isFallThruRelocation(InputSection &IS, InputFile *File, + InputSection *NextIS, Relocation &R) { + if (!isRelocationForJmpInsn(R)) + return false; + + uint64_t AddrLoc = (IS.getOutputSection())->addr + IS.outSecOff + R.offset; + uint64_t TargetOffset = SignExtend64( + InputSectionBase::getRelocTargetVA(File, R.type, R.addend, + AddrLoc, *R.sym, R.expr), + (config->wordsize * 8)); + + // If this jmp is a fall thru, the target offset is the beginning of the + // next section. + uint64_t NextSectionOffset = NextIS->getOutputSection()->addr + + NextIS->outSecOff; + if ((AddrLoc + 4 + TargetOffset) != NextSectionOffset) + return false; + + return true; +} + +// Return the jmp instruction opcode that is the inverse of the given +// opcode. For example, JE inverted is JNE. +static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) { + switch(opcode) { + case J_JE_32: + return J_JNE_32; + case J_JNE_32: + return J_JE_32; + case J_JG_32: + return J_JLE_32; + case J_JGE_32: + return J_JL_32; + case J_JB_32: + return J_JAE_32; + case J_JBE_32: + return J_JA_32; + case J_JL_32: + return J_JGE_32; + case J_JLE_32: + return J_JG_32; + case J_JA_32: + return J_JBE_32; + case J_JAE_32: + return J_JB_32; + default: + return J_UNKNOWN; + } + return J_UNKNOWN; +} + +bool X86_64::deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const { + const unsigned SizeOfDirectJmpInsn = 5; + + if (NextIS == nullptr) + return false; + + if (IS.getSize() < SizeOfDirectJmpInsn) + return false; + + // If this jmp insn can be removed, it is the last insn and the + // relocation is 4 bytes before the end. + unsigned RIndex = getRelocationWithOffset(IS, (IS.getSize() - 4)); + if (RIndex == IS.relocations.size()) + return false; + + Relocation &R = IS.relocations[RIndex]; + + // Check if the relocation corresponds to a direct jmp. + const uint8_t *SecContents = IS.data().data(); + if (!isDirectJmpInsnOpcode(SecContents + R.offset - 1)) + return false; + + if (isFallThruRelocation(IS, File, NextIS, R)) { + // This is a fall thru and can be deleted. + R.expr = R_NONE; + R.offset = 0; + IS.drop_back(SizeOfDirectJmpInsn); + //IS.Filler = {0x90, 0x90, 0x90, 0x90}; + IS.SpecialFiller = X86_NOP_INSTRUCTIONS; + return true; + } + + // Now, check if flip and delete is possible. + const unsigned SizeOfJmpCCInsn = 6; + // To flip, there must be atleast one JmpCC and one direct jmp. + if (IS.getSize() < (SizeOfDirectJmpInsn + SizeOfJmpCCInsn)) return 0; + + unsigned RbIndex = getRelocationWithOffset(IS, + (IS.getSize() - SizeOfDirectJmpInsn - 4)); + if (RbIndex == IS.relocations.size()) return 0; + + Relocation &Rb = IS.relocations[RbIndex]; + + const uint8_t *JmpInsnB = SecContents + Rb.offset - 1; + JmpInsnOpcode JO_B = getJmpInsnType(JmpInsnB - 1, JmpInsnB); + if (JO_B == J_UNKNOWN) + return false; + + if (!isFallThruRelocation(IS, File, NextIS, Rb)) + return false; + + // jmpCC jumps to the fall thru block, the branch can be flipped and the + // jmp can be deleted. + JmpInsnOpcode JInvert = invertJmpOpcode(JO_B); + if (JInvert == J_UNKNOWN) + return false; + IS.addJumpRelocation({JInvert, (Rb.offset - 1), 4}); + // Move R's values to Rb + Rb.expr = R.expr; + Rb.type = R.type; + Rb.addend = R.addend; + Rb.sym = R.sym; + // Cancel R + R.expr = R_NONE; + R.offset = 0; + IS.drop_back(SizeOfDirectJmpInsn); + //IS.Filler = {0x90, 0x90, 0x90, 0x90}; + IS.SpecialFiller = X86_NOP_INSTRUCTIONS; + return true; +} + +// Returns target offset if the Relocation R corresponds to a jmp instruction +// and the offset of the relocation is 1 byte wide. +static uint64_t getTargetOffsetForJmp(InputSection &IS, InputFile *File, + Relocation &R, JmpInsnOpcode &JmpCode) { + const unsigned SizeOfJmpCCOpcode = 2; + + if (!isRelocationForJmpInsn(R)){ + return false; + } + + unsigned JIndex = getJumpRelocationWithOffset(IS, (R.offset - 1)); + if (JIndex != IS.JumpRelocations.size()){ + JmpCode = static_cast(IS.JumpRelocations[JIndex].Original); + } else { + const uint8_t *SecContents = IS.data().data(); + const uint8_t *JmpInsn = SecContents + R.offset - 1; + const uint8_t *JmpCCInsn = (R.offset >= SizeOfJmpCCOpcode) ? + (JmpInsn - 1) : nullptr; + JmpCode = getJmpInsnType(JmpCCInsn, JmpInsn); + } + if (JmpCode == J_UNKNOWN){ + return 0; + } + + uint64_t AddrLoc = (IS.getOutputSection())->addr + IS.outSecOff + R.offset; + uint64_t TargetOffset = SignExtend64( + InputSectionBase::getRelocTargetVA(File, R.type, R.addend, + AddrLoc, *R.sym, R.expr), + (config->wordsize * 8)); + + return TargetOffset; +} + +static bool isOneByteOffsetWhenShrunk(uint64_t TargetOffset, + JmpInsnOpcode JmpCode, + unsigned BytesShrunk, + unsigned MaxAlign) { + // For negative jumps, the jump target will be closer if shrinking + // is done. + if ((int64_t) TargetOffset < 0){ + TargetOffset += BytesShrunk; + TargetOffset += (JmpCode == J_JMP_32) ? 3 : 4; + } + + if (MaxAlign > 0) { + if ((int64_t) TargetOffset < 0) { + TargetOffset -= (MaxAlign - 1); + } else { + TargetOffset += (MaxAlign - 1); + } + } + + return ((int64_t)TargetOffset == llvm::SignExtend64(TargetOffset, 8)); +} + +static bool isOneByteOffset(uint64_t TargetOffset, unsigned BytesGrown, + unsigned MaxAlign) { + // For negative jumps, the jump target is further. + if ((int64_t) TargetOffset < 0){ + TargetOffset -= BytesGrown; + } + return ((int64_t)TargetOffset == llvm::SignExtend64(TargetOffset, 8)); +} + + + +static void shrinkJmpWithRelocation(InputSection &IS, JmpInsnOpcode JmpCode, + Relocation &R, unsigned &BytesShrunk, + bool DoShrinkJmp = true) { + // Check if there is a Jump Relocation against this offset. + unsigned JIndex = getJumpRelocationWithOffset(IS, (R.offset - 1)); + + if (DoShrinkJmp && JmpCode!=J_JMP_32) + BytesShrunk += 1; + + // Update R.offset + R.offset -= BytesShrunk; + unsigned NewJmpSize = DoShrinkJmp ? 1 : 4; + + if (JIndex < IS.JumpRelocations.size()) { + JumpRelocation &J = IS.JumpRelocations[JIndex]; + assert((!DoShrinkJmp || J.Size == 4) && "Not the right size of jump."); + J.Offset = R.offset - 1; + if (DoShrinkJmp) + J.Size = NewJmpSize; + } else { + IS.addJumpRelocation({JmpCode, R.offset - 1, NewJmpSize}); + } + + if (DoShrinkJmp) { + // Shrinking Jmp corresponding to relocation R, adjust type and addend. + R.type = R_X86_64_PC8; + assert(R.addend == -4 && "Addend must be -4 to shrink."); + R.addend += 3; + BytesShrunk += 3; + } +} + +unsigned X86_64::shrinkJmpInsn(InputSection &IS, InputFile *File, + unsigned MaxAlign) const { + const unsigned SizeOfDirectShortJmpInsn = 2; + const unsigned SizeOfDirectNearJmpInsn = 5; + const unsigned SizeOfJmpCCInsn = 6; + int SizeOfDirectJmpInsn = SizeOfDirectNearJmpInsn; + + bool IsShortJmp = false; + + if (IS.getSize() < SizeOfDirectNearJmpInsn){ + return 0; + } + + unsigned RIndex = getRelocationWithOffset(IS, (IS.getSize() - 4)); + + if (RIndex == IS.relocations.size()){ + RIndex = getRelocationWithOffset(IS, (IS.getSize() - 1)); + if (RIndex == IS.relocations.size()) { + return 0; + } + + SizeOfDirectJmpInsn = SizeOfDirectShortJmpInsn; + IsShortJmp = true; + } + + Relocation &R = IS.relocations[RIndex]; + + JmpInsnOpcode JmpCode = J_UNKNOWN; + + uint64_t TargetOffset = getTargetOffsetForJmp(IS, File, R, JmpCode); + bool DirectJmp = (JmpCode == J_JMP_32); + + if (JmpCode == J_UNKNOWN) { + return 0; + } + + unsigned BytesShrunk = 0; + + if (!DirectJmp) { + if (!IsShortJmp && isOneByteOffsetWhenShrunk(TargetOffset, JmpCode, BytesShrunk, MaxAlign)) { + shrinkJmpWithRelocation(IS, JmpCode, R, BytesShrunk); + } + } else { + // For Direct Jmps, the previous insn might be a jmpcc that can be + // shrinked. Check that also. + if (IS.getSize() >= (SizeOfDirectJmpInsn + SizeOfJmpCCInsn)) { + unsigned RbIndex = getRelocationWithOffset( + IS, (IS.getSize() - SizeOfDirectJmpInsn - 4)); + + if (RbIndex != IS.relocations.size()) { + Relocation &Rb = IS.relocations[RbIndex]; + JmpInsnOpcode JmpCode_B = J_UNKNOWN; + uint64_t TargetOffset_B = getTargetOffsetForJmp(IS, File, Rb, JmpCode_B); + if (JmpCode_B != J_UNKNOWN && JmpCode_B != J_JMP_32 + && isOneByteOffsetWhenShrunk(TargetOffset_B, JmpCode, BytesShrunk, MaxAlign)) { + shrinkJmpWithRelocation(IS, JmpCode_B, Rb, BytesShrunk); + } + } + } + bool CanShrinkR = !IsShortJmp && isOneByteOffsetWhenShrunk(TargetOffset, JmpCode, BytesShrunk, MaxAlign); + shrinkJmpWithRelocation(IS, JmpCode, R, BytesShrunk, CanShrinkR); + } + + if (BytesShrunk) { + IS.drop_back(BytesShrunk); + } + return BytesShrunk; +} + +static void growJmpWithRelocation(InputSection &IS, JmpInsnOpcode JmpCode, + Relocation &R, unsigned &BytesGrown, + bool DoGrowJmp = true) { + // Check if there is a Jump Relocation against this offset. + unsigned JIndex = getJumpRelocationWithOffset(IS, (R.offset - 1)); + + if (JIndex == IS.JumpRelocations.size()){ + error("Jump relocation does not exist!"); + return; + } + + if (DoGrowJmp && JmpCode!=J_JMP_32) + BytesGrown += 1; + + // Update R.offset + R.offset += BytesGrown; + + JumpRelocation &J = IS.JumpRelocations[JIndex]; + assert((!DoGrowJmp || J.Size == 1) && "Not the right size of jump."); + J.Offset = R.offset - 1; + if (DoGrowJmp) { + // Growing Jmp corresponding to relocation R, adjust type and addend. + J.Size = 4; + R.type = R_X86_64_PC32; + assert(R.addend == -1 && "Addend must be -1 to grow."); + R.addend -= 3; + BytesGrown += 3; + } +} + +unsigned X86_64::growJmpInsn(InputSection &IS, InputFile *File, unsigned MaxAlign) const { + const unsigned SizeOfJmpCCInsn = 2; + const unsigned SizeOfDirectNearJmpInsn = 5; + const unsigned SizeOfDirectShortJmpInsn = 2; + int SizeOfDirectJmpInsn = SizeOfDirectShortJmpInsn; + + if (IS.getSize() < SizeOfDirectShortJmpInsn) + return 0; + + bool IsShortJmp = true; + + unsigned RIndex = getRelocationWithOffset(IS, (IS.getSize() - 1)); + + if (RIndex == IS.relocations.size()){ + if (IS.getSize() < SizeOfDirectNearJmpInsn){ + return 0; + } + + RIndex = getRelocationWithOffset(IS, (IS.getSize() - 4)); + if (RIndex == IS.relocations.size()){ + return 0; + } + IsShortJmp = false; + SizeOfDirectJmpInsn = SizeOfDirectNearJmpInsn; + } + + Relocation &R = IS.relocations[RIndex]; + + JmpInsnOpcode JmpCode = J_UNKNOWN; + + uint64_t TargetOffset = getTargetOffsetForJmp(IS, File, R, JmpCode); + bool DirectJmp = (JmpCode == J_JMP_32); + + if (JmpCode == J_UNKNOWN) { + return 0; + } + + unsigned BytesGrown = 0; + if (!DirectJmp) { + // Grow JmpInsn. + if (IsShortJmp && !isOneByteOffset(TargetOffset, BytesGrown, MaxAlign)){ + growJmpWithRelocation(IS, JmpCode, R, BytesGrown); + } + } else { + // For Direct Jmps, the previous insn might be a jmpcc that might need + // to be grown. Check that also. + if (IS.getSize() >= (SizeOfDirectJmpInsn + SizeOfJmpCCInsn)) { + unsigned RbIndex = getRelocationWithOffset( + IS, (IS.getSize() - SizeOfDirectJmpInsn - 1)); + + if (RbIndex != IS.relocations.size()) { + Relocation &Rb = IS.relocations[RbIndex]; + JmpInsnOpcode JmpCode_B = J_UNKNOWN; + uint64_t TargetOffset_B = getTargetOffsetForJmp(IS, File, Rb, JmpCode_B); + if (JmpCode_B != J_UNKNOWN && JmpCode_B != J_JMP_32 + && !isOneByteOffset(TargetOffset_B, BytesGrown, MaxAlign)) { + growJmpWithRelocation(IS, JmpCode_B, Rb, BytesGrown); + } + } + } + bool ShouldGrowR = IsShortJmp && !isOneByteOffset(TargetOffset, BytesGrown, MaxAlign); + growJmpWithRelocation(IS, JmpCode, R, BytesGrown, ShouldGrowR); + } + + if (BytesGrown) { + IS.push_back(BytesGrown); + } + return BytesGrown; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) @@ -346,6 +842,47 @@ "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } +void X86_64::relocateOneJumpRelocation(uint8_t *Loc, JumpRelType Type, + unsigned Size) const { + switch(Type) { + case J_JMP_32: + if (Size == 4) *Loc = 0xe9; else *Loc = 0xeb; + break; + case J_JE_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x84; } else *Loc = 0x74; + break; + case J_JNE_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x85;} else *Loc = 0x75; + break; + case J_JG_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x8f;} else *Loc = 0x7f; + break; + case J_JGE_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x8d;} else *Loc = 0x7d; + break; + case J_JB_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x82;} else *Loc = 0x72; + break; + case J_JBE_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x86;} else *Loc = 0x76; + break; + case J_JL_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x8c;} else *Loc = 0x7c; + break; + case J_JLE_32: + if (Size == 4) {*(Loc-1) = 0x0f; *Loc = 0x8e;} else *Loc = 0x7e; + break; + case J_JA_32: + if (Size == 4) {*(Loc-1) = 0x0f ; *Loc = 0x87;} else *Loc = 0x77; + break; + case J_JAE_32: + if (Size == 4) {*(Loc-1) = 0x0f ; *Loc = 0x83;} else *Loc = 0x73; + break; + default: + error(getErrorLocation(Loc) + "unrecognized jump reloc " + Twine(Type)); + } +} + void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_X86_64_8: Index: lld/ELF/CMakeLists.txt =================================================================== --- lld/ELF/CMakeLists.txt +++ lld/ELF/CMakeLists.txt @@ -36,6 +36,11 @@ MapFile.cpp MarkLive.cpp OutputSections.cpp + Propeller.cpp + PropellerBBReordering.cpp + PropellerELFCfg.cpp + PropellerFuncOrdering.cpp + PropellerBBReordering.cpp Relocations.cpp ScriptLexer.cpp ScriptParser.cpp Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -105,11 +105,22 @@ llvm::StringRef optRemarksPasses; llvm::StringRef optRemarksFormat; llvm::StringRef progName; + llvm::StringRef propeller; + llvm::StringRef propellerDumpSymbolOrder; + double propellerFallthroughWeight; + double propellerForwardJumpWeight; + double propellerBackwardJumpWeight; + uint64_t propellerForwardJumpDistance; + uint64_t propellerBackwardJumpDistance; + uint64_t propellerChainSplitThreshold; + std::vector propellerDumpCfgs; + std::vector propellerOpts; llvm::StringRef printSymbolOrder; llvm::StringRef soName; llvm::StringRef sysroot; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; + llvm::StringRef ltoBasicBlockSections; std::pair thinLTOObjectSuffixReplace; std::pair thinLTOPrefixReplace; std::string rpath; @@ -117,6 +128,7 @@ std::vector auxiliaryList; std::vector filterList; std::vector searchPaths; + llvm::StringMap symbolAlignmentFile; std::vector symbolOrderingFile; std::vector undefined; std::vector dynamicList; @@ -163,6 +175,7 @@ bool ltoCSProfileGenerate; bool ltoDebugPassManager; bool ltoNewPassManager; + bool ltoUniqueBBSectionNames; bool mergeArmExidx; bool mipsN32Abi = false; bool nmagic; @@ -170,17 +183,25 @@ bool nostdlib; bool oFormatBinary; bool omagic; + bool optimizeBBJumps; bool optRemarksWithHotness; bool pacPlt; bool picThunk; bool pie; bool printGcSections; bool printIcfSections; + bool propellerKeepNamedSymbols; + bool propellerPrintStats; + bool propellerAlignBasicBlocks; + bool propellerReorderBlocks; + bool propellerReorderFuncs; + bool propellerSplitFuncs; bool relocatable; bool relrPackDynRelocs; bool saveTemps; bool singleRoRx; bool shared; + bool shrinkJumpsAggressively; bool isStatic = false; bool sysvHash = false; bool target1Rel; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -30,6 +30,7 @@ #include "LinkerScript.h" #include "MarkLive.h" #include "OutputSections.h" +#include "Propeller.h" #include "ScriptParser.h" #include "SymbolTable.h" #include "Symbols.h" @@ -69,6 +70,9 @@ using namespace lld; using namespace lld::elf; +using std::chrono::duration; +using std::chrono::system_clock; + Configuration *elf::config; LinkerDriver *elf::driver; @@ -800,6 +804,24 @@ return names.takeVector(); } +// Parse the symbol alignment file and warn for any duplicate entries. +static StringMap getSymbolAlignmentFile(MemoryBufferRef mb) { + StringMap alignments; + for (StringRef s : args::getLines(mb)) { + auto entry = s.split(' '); + unsigned align = 0; + if (!to_integer(entry.second, align)) { + warn(mb.getBufferIdentifier() + ": invalid alignment (" + entry.second + + ") for symbol: " + entry.first); + continue; + } + if (!alignments.insert(std::make_pair(entry.first, align)).second) + warn(mb.getBufferIdentifier() + + ": duplicate alignment for symbol: " + entry.first); + } + return alignments; +} + static void parseClangOption(StringRef opt, const Twine &msg) { std::string err; raw_string_ostream os(err); @@ -837,6 +859,10 @@ config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false); config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common, !args.hasArg(OPT_relocatable)); + config->optimizeBBJumps = + args.hasFlag(OPT_optimize_bb_jumps, + OPT_no_optimize_bb_jumps, true); + config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); config->disableVerify = args.hasArg(OPT_disable_verify); @@ -881,6 +907,12 @@ config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + config->ltoBasicBlockSections = + args.getLastArgValue(OPT_lto_basicblock_sections); + config->ltoUniqueBBSectionNames = + args.hasFlag(OPT_lto_unique_bb_section_names, + OPT_no_lto_unique_bb_section_names, + false); config->mapFile = args.getLastArgValue(OPT_Map); config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = @@ -905,12 +937,86 @@ args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); + + config->propeller = args.getLastArgValue(OPT_propeller); + + config->propellerKeepNamedSymbols = + args.hasFlag(OPT_propeller_keep_named_symbols, + OPT_no_propeller_keep_named_symbols, false); + + config->propellerDumpSymbolOrder = + args.getLastArgValue(OPT_propeller_dump_symbol_order); + + config->propellerPrintStats = + args.hasFlag(OPT_propeller_print_stats, + OPT_no_propeller_print_stats, false); + + config->propellerDumpCfgs = args.getAllArgValues(OPT_propeller_dump_cfg); + + config->propellerAlignBasicBlocks = + args.hasFlag(OPT_propeller_align_basicblocks, + OPT_no_propeller_align_basicblocks, false); + + config->propellerReorderBlocks = config->propellerReorderFuncs = + config->propellerSplitFuncs = !config->propeller.empty(); + + config->propellerFallthroughWeight = + args::getFloat(args, OPT_propeller_fallthrough_weight, 1.0); + config->propellerForwardJumpWeight = + args::getFloat(args, OPT_propeller_forward_jump_weight, 0.1); + config->propellerBackwardJumpWeight = + args::getFloat(args, OPT_propeller_backward_jump_weight, 0.1); + + config->propellerForwardJumpDistance = + args::getFloat(args, OPT_propeller_forward_jump_distance, 1024); + config->propellerBackwardJumpDistance = + args::getFloat(args, OPT_propeller_backward_jump_distance, 640); + config->propellerChainSplitThreshold = + args::getFloat(args, OPT_propeller_chain_split_threshold, 128); + + // Parse Propeller flags. + auto propellerOpts = args.getAllArgValues(OPT_propeller_opt); + bool splitFuncsExplicit = false; + for(auto& propellerOpt: propellerOpts){ + StringRef S = StringRef(propellerOpt); + if (S == "reorder-funcs"){ + config->propellerReorderFuncs = true; + } else if (S == "no-reorder-funcs") { + config->propellerReorderFuncs = false; + } else if (S == "reorder-blocks") { + config->propellerReorderBlocks = true; + } else if (S == "no-reorder-blocks") { + config->propellerReorderBlocks = false; + } else if (S == "split-funcs") { + config->propellerSplitFuncs = true; + splitFuncsExplicit = true; + } else if (S == "no-split-funcs") { + config->propellerSplitFuncs = false; + } else + error("unknown propeller option: " + S); + } + + if (!config->propeller.empty() && !config->propellerReorderBlocks) { + if (splitFuncsExplicit){ + error("propeller: Inconsistent combination of propeller optimizations" + " 'split-funcs' and 'no-reorder-blocks'."); + } else { + warn("propeller: no-reorder-blocks implicitly sets no-split-funcs."); + config->propellerSplitFuncs = false; + } + } + config->rpath = getRpath(args); config->relocatable = args.hasArg(OPT_relocatable); config->saveTemps = args.hasArg(OPT_save_temps); config->searchPaths = args::getStrings(args, OPT_library_path); config->sectionStartMap = getSectionStartMap(args); config->shared = args.hasArg(OPT_shared); + + config->shrinkJumpsAggressively = + args.hasFlag(OPT_shrink_jumps_aggressively, + OPT_no_shrink_jumps_aggressively, true); + config->singleRoRx = args.hasArg(OPT_no_rosegment); config->soName = args.getLastArgValue(OPT_soname); config->sortSection = getSortSection(args); @@ -1040,6 +1146,15 @@ } } + if (auto *arg = args.getLastArg(OPT_symbol_alignment_file)){ + if (Optional buffer = readFile(arg->getValue())){ + config->symbolAlignmentFile = getSymbolAlignmentFile(*buffer); + } else { + error(StringRef("Failed to read symbol alignment file: ") + + arg->getValue()); + } + } + assert(config->versionDefinitions.empty()); config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}}); config->versionDefinitions.push_back( @@ -1858,6 +1973,42 @@ for (InputSectionBase *s : f->getSections()) inputSections.push_back(cast(s)); + if (!config->propeller.empty()) { + lld::propeller::Propeller P(symtab); + if (P.checkPropellerTarget()) { + if (P.processFiles(objectFiles)) { + config->symbolOrderingFile = P.genSymbolOrderingFile(); + } else { + error("Propeller stage failed."); + } + } else { + warn("[Propeller]: Propeller skipped '" + config->outputFile + "'."); + } + } + + if (!config->symbolAlignmentFile.empty()) { + auto alignSym = [](Symbol *sym) { + auto it = config->symbolAlignmentFile.find(sym->getName()); + if (it == config->symbolAlignmentFile.end()) + return; + if (auto *d = dyn_cast(sym)) { + if (auto *sec = dyn_cast_or_null(d->section)) { + sec->alignment = it->second; + } + } + }; + + for (InputFile *file : objectFiles) + for (Symbol *sym : file->getSymbols()) + if (sym->isLocal()) + alignSym(sym); + + symtab->forEachSymbol([&alignSym](Symbol *sym) { + if (!sym->isLazy()) + alignSym(sym); + }); + } + llvm::erase_if(inputSections, [](InputSectionBase *s) { if (s->type == SHT_LLVM_SYMPART) { readSymbolPartitionSection(s); Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -128,6 +128,28 @@ return cast_or_null>(file); } + unsigned BytesDropped = 0; + + bool Trimmed = false; + + void drop_back(uint64_t num) { + BytesDropped += num; + } + + void push_back(uint64_t num) { + assert(BytesDropped >= num); + BytesDropped -= num; + } + + void trim() { + if (Trimmed) + return; + if (BytesDropped){ + rawData = rawData.drop_back(BytesDropped); + Trimmed = true; + } + } + ArrayRef data() const { if (uncompressedSize >= 0) uncompress(); @@ -179,12 +201,28 @@ // the mmap'ed output buffer. template void relocate(uint8_t *buf, uint8_t *bufEnd); void relocateAlloc(uint8_t *buf, uint8_t *bufEnd); + static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, + int64_t A, uint64_t P, const Symbol &Sym, + RelExpr Expr); // The native ELF reloc data type is not very convenient to handle. // So we convert ELF reloc records to our own records in Relocations.cpp. // This vector contains such "cooked" relocations. std::vector relocations; + llvm::Optional> Filler; + + // Special filler provides variable-length padding instructions. + // This has to be ordered by length. + llvm::Optional>> SpecialFiller; + + // These are artificial jump relocations. + std::vector JumpRelocations; + + void addJumpRelocation(JumpRelocation J) { + JumpRelocations.push_back(J); + } + // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find // such functions and adjust their prologues. This is very similar Index: lld/ELF/InputSection.cpp =================================================================== --- lld/ELF/InputSection.cpp +++ lld/ELF/InputSection.cpp @@ -139,7 +139,10 @@ return s->getSize(); if (uncompressedSize >= 0) return uncompressedSize; - return rawData.size(); + if (Trimmed) + return rawData.size(); + else + return rawData.size() - BytesDropped; } void InputSectionBase::uncompress() const { @@ -646,7 +649,7 @@ } } -static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, +uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, int64_t a, uint64_t p, const Symbol &sym, RelExpr expr) { switch (expr) { case R_ABS: @@ -854,6 +857,12 @@ if (expr == R_NONE) continue; + if (expr == R_SIZE) { + target->relocateOne(bufLoc, type, + SignExtend64(sym.getSize() + addend)); + continue; + } + if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) { std::string msg = getLocation(offset) + ": has non-ABS relocation " + toString(type) + @@ -924,6 +933,8 @@ const unsigned bits = config->wordsize * 8; for (const Relocation &rel : relocations) { + if (rel.expr == R_NONE) + continue; uint64_t offset = rel.offset; if (auto *sec = dyn_cast(this)) offset += sec->outSecOff; @@ -985,6 +996,19 @@ break; } } + + // Relocate JumpRelocations. JumpRelocations are created when the opcode of + // a jmp insn must be modified to shrink the jmp insn or to flip the jmp + // insn. This is primarily used to relax and optimize jumps created to use + // basic block sections. + if (auto *Sec = dyn_cast(this)) { + for (const JumpRelocation &JumpRel : JumpRelocations) { + uint64_t Offset = JumpRel.Offset; + Offset += Sec->outSecOff; + uint8_t *BufLoc = buf + Offset; + target->relocateOneJumpRelocation(BufLoc, JumpRel.Original, JumpRel.Size); + } + } } // For each function-defining prologue, find any calls to __morestack, Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -58,6 +58,20 @@ return ret; } +static void getBasicBlockSectionsList(MemoryBufferRef MBRef, + TargetOptions &Options) { + SmallVector Arr; + MBRef.getBuffer().split(Arr, '\n'); + for (StringRef S : Arr) { + // Function names follow a '!' character. + // Empty '!' implies no more functions. + if (S.size() == 1 && S[0] == '!') + break; + if (S.size() > 1 && S[0] == '!') + Options.BasicBlockSectionsList[S.str().substr(1)] = true; + } +} + static std::string getThinLTOOutputFile(StringRef modulePath) { return lto::getThinLTOOutputFile(modulePath, config->thinLTOPrefixReplace.first, @@ -76,6 +90,23 @@ c.Options.FunctionSections = true; c.Options.DataSections = true; + // Check if basic block sections must be used. + if (!config->ltoBasicBlockSections.empty()) { + if (config->ltoBasicBlockSections.equals("all")) + c.Options.BasicBlockSections = BasicBlockSection::All; + else if (config->ltoBasicBlockSections.equals("labels")) + c.Options.BasicBlockSections = BasicBlockSection::Labels; + else if (config->ltoBasicBlockSections.equals("none")) + c.Options.BasicBlockSections = BasicBlockSection::None; + else if (Optional Buffer = + readFile(config->ltoBasicBlockSections)) { + getBasicBlockSectionsList(*Buffer, c.Options); + c.Options.BasicBlockSections = BasicBlockSection::List; + } + } + + c.Options.UniqueBBSectionNames = config->ltoUniqueBBSectionNames; + if (auto relocModel = getRelocModelFromCMModel()) c.RelocModel = *relocModel; else if (config->relocatable) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -42,6 +42,10 @@ defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; +defm optimize_bb_jumps: B<"optimize-bb-jumps", + "Remove direct jumps at the end to the next basic block", + "Do not remove any direct jumps at the end to the next basic block">; + defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " @@ -300,6 +304,49 @@ defm print_symbol_order: Eq<"print-symbol-order", "Print a symbol order specified by --call-graph-ordering-file into the speficied file">; +defm propeller: Eq<"propeller", "Propeller profile">; + +defm propeller_opt: Eq<"propeller-opt", + "Propeller optimization flags: reorder-blocks, reorder-funcs, and split-funcs is on by the default when -propeller is specified.">, + MetaVarName<"[reorder-blocks, no-reorder-blocks, reorder-funcs, no-reorder-funcs, split-funcs, no-split-funcs]">; + +defm propeller_keep_named_symbols: B<"propeller-keep-named-symbols", + "Do not delete basic block section symbols", + "Delete unused basic block section symbols (default)">; + +defm propeller_print_stats: B<"propeller-print-stats", + "Print propeller statistics.", + "Do not print propeller statistics (default)">; + +defm propeller_dump_symbol_order: Eq<"propeller-dump-symbol-order", + "Dump the propeller-generated symbol ordering into the file.">, + MetaVarName<"">; + +defm propeller_dump_cfg: Eq<"propeller-dump-cfg", + "Dump the cfg of the function in DOT format (in a file named the same as the function).">; + +defm propeller_align_basicblocks: B<"propeller-align-basicblocks", + "Align basic blocks.", + "Do not align basic blocks (default)">; + +defm propeller_fallthrough_weight: Eq<"propeller-fallthrough-weight", + "Fallthrough weight parameter to use in ExtTSP algorithm (default: 1.0).">; + +defm propeller_forward_jump_weight: Eq<"propeller-forward-jump-weight", + "(Near) forward jump weight parameter to use in ExtTSP algorithm (default: 0.1).">; + +defm propeller_backward_jump_weight: Eq<"propeller-backward-jump-weight", + "(Near) backward jump weight parameter to use in ExtTSP algorithm (default: 0.1).">; + +defm propeller_forward_jump_distance: Eq<"propeller-forward-jump-distance", + "Maximum distance for a forward jump in ExtTSP algorithm (default: 1024).">; + +defm propeller_backward_jump_distance: Eq<"propeller-backward-jump-distance", + "Maximum distance for a backward jump in ExtTSP algorithm (default: 640).">; + +defm propeller_chain_split_threshold: Eq<"propeller-chain-split-threshold", + "Maximum binary size of a chain which could be split in ExtTSP algorithm (default: 128).">; + def pop_state: F<"pop-state">, HelpText<"Undo the effect of -push-state">; @@ -326,6 +373,10 @@ def shared: F<"shared">, HelpText<"Build a shared object">; +defm shrink_jumps_aggressively: B<"shrink-jumps-aggressively", + "Shrink Jumps aggressively and aligment-agnostic (deafult)", + "Do not shrink jumps aggressively">; + defm soname: Eq<"soname", "Set DT_SONAME">; defm sort_section: @@ -341,6 +392,9 @@ def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; +defm symbol_alignment_file: + Eq<"symbol-alignment-file", "Overwrite alignment for symbols.">; + defm symbol_ordering_file: Eq<"symbol-ordering-file", "Layout sections to place symbols in the order specified by symbol ordering file">; @@ -496,6 +550,11 @@ HelpText<"The format used for serializing remarks (default: YAML)">; defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">; def save_temps: F<"save-temps">; +def lto_basicblock_sections: J<"lto-basicblock-sections=">, + HelpText<"Enable basic block sections for LTO">; +defm lto_unique_bb_section_names: B<"lto-unique-bb-section-names", + "Give unique names to every basic block section for LTO", + "Do not give unique names to every basic block section for LTO">; def thinlto_cache_dir: J<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; Index: lld/ELF/OutputSections.cpp =================================================================== --- lld/ELF/OutputSections.cpp +++ lld/ELF/OutputSections.cpp @@ -243,6 +243,22 @@ sortByOrder(isd->sections, order); } +static void fill(uint8_t *Buf, size_t Size, + const std::vector> &SFiller) { + unsigned I = 0; + unsigned NC = Size / SFiller.back().size(); + for (unsigned C = 0 ; C < NC; ++C){ + memcpy(Buf + I, SFiller.back().data(), SFiller.back().size()); + I += SFiller.back().size(); + } + unsigned remaining = Size - I; + if (!remaining) + return; + if (SFiller.at(remaining-1).size() != remaining) + fatal("Failed padding with special filler."); + memcpy(Buf + I, SFiller.at(remaining-1).data(), remaining); +} + // Fill [Buf, Buf + Size) with Filler. // This is used for linker script "=fillexp" command. static void fill(uint8_t *buf, size_t size, @@ -326,7 +342,13 @@ end = buf + size; else end = buf + sections[i + 1]->outSecOff; - fill(start, end - start, filler); + // Check if this IS needs a special filler. + if (isec->SpecialFiller) + fill(start, end - start, *(isec->SpecialFiller)); + else if (isec->Filler) + fill(start, end - start, *(isec->Filler)); + else + fill(start, end - start, filler); } }); Index: lld/ELF/Relocations.h =================================================================== --- lld/ELF/Relocations.h +++ lld/ELF/Relocations.h @@ -24,6 +24,7 @@ // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL. using RelType = uint32_t; +using JumpRelType = uint32_t; // List of target-independent relocation types. Relocations read // from files are converted to these types so that the main code @@ -108,6 +109,13 @@ Symbol *sym; }; +// Artificial Relocations to manipulate jump instructions. +struct JumpRelocation { + JumpRelType Original; + uint64_t Offset; + unsigned Size; +}; + // This function writes undefined symbol diagnostics to an internal buffer. // Call reportUndefinedSymbols() after calling scanRelocations() to emit // the diagnostics. Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -27,6 +27,7 @@ #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Endian.h" #include +#include namespace lld { namespace elf { @@ -585,6 +586,7 @@ unsigned getNumSymbols() const { return symbols.size() + 1; } size_t getSymbolIndex(Symbol *sym); ArrayRef getSymbols() const { return symbols; } + std::map> EndsMap; protected: void sortSymTabSymbols(); Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -24,6 +24,7 @@ #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/PropellerCommon.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "lld/Common/Version.h" @@ -2057,9 +2058,29 @@ void SymbolTableBaseSection::addSymbol(Symbol *b) { // Adding a local symbol to a .dynsym is a bug. assert(this->type != SHT_DYNSYM || !b->isLocal()); + assert(this->type != SHT_DYNSYM || !b->isLocal()); + StringRef SName = b->getName(); + uint64_t EndKey = (uint64_t)(SName.data() + SName.size()); + auto I = EndsMap.find(EndKey); + if (I != EndsMap.end()) { + uint64_t offset = I->second.first; + uint32_t size = I->second.second; + int64_t diff = size - SName.size(); + if (diff >= 0) { + uint64_t new_offset = offset + diff; + symbols.push_back({b, new_offset}); + return; + } + } + bool hashIt = b->isLocal(); - symbols.push_back({b, strTabSec.addString(b->getName(), hashIt)}); + uint32_t offset = strTabSec.addString(b->getName(), hashIt); + symbols.push_back({b, offset}); + if (lld::propeller::SymbolEntry::isBBSymbol(b->getName())) { + EndsMap.emplace(std::piecewise_construct, std::forward_as_tuple(EndKey), + std::forward_as_tuple(offset, SName.size())); + } } size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) { Index: lld/ELF/Target.h =================================================================== --- lld/ELF/Target.h +++ lld/ELF/Target.h @@ -79,8 +79,24 @@ virtual void relocateOne(uint8_t *loc, RelType type, uint64_t val) const = 0; + virtual void relocateOneJumpRelocation(uint8_t *Loc, JumpRelType Type, + JumpRelType Val) const { } + virtual ~TargetInfo(); + virtual bool deleteFallThruJmpInsn(InputSection &IS, InputFile *File, + InputSection *NextIS) const { + return false; + } + + virtual unsigned shrinkJmpInsn(InputSection &IS, InputFile *File, + uint32_t MaxAlignment) const + { return 0; } + + virtual unsigned growJmpInsn(InputSection &IS, InputFile *File, + uint32_t MaxAlignment) const + { return 0; } + unsigned defaultCommonPageSize = 4096; unsigned defaultMaxPageSize = 4096; Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -14,6 +14,7 @@ #include "LinkerScript.h" #include "MapFile.h" #include "OutputSections.h" +#include "Propeller.h" #include "Relocations.h" #include "SymbolTable.h" #include "Symbols.h" @@ -21,6 +22,7 @@ #include "Target.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" +#include "lld/Common/PropellerCommon.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" @@ -28,7 +30,12 @@ #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/xxhash.h" +#include +#include #include +#include + +#define DEBUG_TYPE "lld" using namespace llvm; using namespace llvm::ELF; @@ -39,6 +46,9 @@ using namespace lld; using namespace lld::elf; +using std::chrono::system_clock; +using std::chrono::duration; + namespace { // The writer writes a SymbolTable result to a file. template class Writer { @@ -57,6 +67,7 @@ void sortSections(); void resolveShfLinkOrder(); void finalizeAddressDependentContent(); + void optimizeBasicBlockJumps(); void sortInputSections(); void finalizeSections(); void checkExecuteOnly(); @@ -545,7 +556,11 @@ // completes section contents. For example, we need to add strings // to the string table, and add entries to .got and .plt. // finalizeSections does that. + //auto startFinalizeSectionTime = system_clock::now(); finalizeSections(); + //auto endFinalizeSectionTime = system_clock::now(); + //duration FinalizeSectionTime = endFinalizeSectionTime - startFinalizeSectionTime; + //warn("[TIME](s) finalize section (includes section ordering): " + Twine(std::to_string(FinalizeSectionTime.count()))); checkExecuteOnly(); if (errorCount()) return; @@ -625,12 +640,27 @@ if (config->emitRelocs) return true; + StringRef name = sym.getName(); + + if (name.empty() && sym.type == llvm::ELF::STT_NOTYPE && + sym.binding == llvm::ELF::STB_LOCAL) { + return false; + } + + if (!config->propeller.empty() && + lld::propeller::SymbolEntry::isBBSymbol(name)) { + if (config->propellerKeepNamedSymbols || + propeller::PropLeg.shouldKeepBBSymbol(name)) + return true; + else + return false; + } + // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if // * --discard-locals is used. // * The symbol is in a SHF_MERGE section, which is normally the reason for // the assembler keeping the .L symbol. - StringRef name = sym.getName(); bool isLocal = name.startswith(".L") || name.empty(); if (!isLocal) return true; @@ -672,6 +702,8 @@ return; for (InputFile *file : objectFiles) { ObjFile *f = cast>(file); + std::list localNonBBSymbols; + std::list localBBSymbols; for (Symbol *b : f->getLocalSymbols()) { if (!b->isLocal()) fatal(toString(f) + @@ -685,7 +717,24 @@ continue; if (!shouldKeepInSymtab(*dr)) continue; - in.symTab->addSymbol(b); + + if (lld::propeller::SymbolEntry::isBBSymbol(b->getName())) + localBBSymbols.emplace_back(b); + else + localNonBBSymbols.emplace_back(b); + } + + localBBSymbols.sort([](Symbol *A, Symbol *B) { + return A->getName().size() > B->getName().size(); + }); + + // Add BB symbols to SymTab first. + for (auto *S : localBBSymbols) { + in.symTab->addSymbol(S); + } + + for (auto *S : localNonBBSymbols) { + in.symTab->addSymbol(S); } } } @@ -1592,6 +1641,137 @@ } } +// If Input Sections have been shrinked (basic block sections) then +// update symbol values and sizes associated with these sections. +static void fixSymbolsAfterShrinking() { + for (InputFile *File : objectFiles) { + parallelForEach(File->getSymbols(), [&](Symbol *Sym) { + auto *Def = dyn_cast(Sym); + if (!Def) + return; + + const auto *Sec = Def->section; + if (!Sec) + return; + + const auto *InputSec = dyn_cast(Sec->repl); + if (!InputSec || !InputSec->BytesDropped) + return; + + const auto NewSize = InputSec->data().size(); + + if (Def->value > NewSize) { + LLVM_DEBUG(llvm::dbgs() << "Moving symbol " << Sym->getName() << + " from " << Def->value << " to " << + Def->value - InputSec->BytesDropped << " bytes\n"); + Def->value -= InputSec->BytesDropped; + return; + } + + if (Def->value + Def->size > NewSize) { + LLVM_DEBUG(llvm::dbgs() << "Shrinking symbol " << Sym->getName() << + " from " << Def->size << " to " << + Def->size - InputSec->BytesDropped << " bytes\n"); + Def->size -= InputSec->BytesDropped; + } + }); + } +} + + +// If basic block sections exist, there are opportunities to delete fall thru +// jumps and shrink jump instructions after basic block reordering. This +// relaxation pass does that. +template void Writer::optimizeBasicBlockJumps() { + if (!config->optimizeBBJumps || !ELFT::Is64Bits) + return; + + script->assignAddresses(); + for (OutputSection *OS : outputSections) { + if (!(OS->flags & SHF_EXECINSTR)) continue; + std::vector Sections = getInputSections(OS); + std::vector Result(Sections.size()); + // Delete all fall through jump instructions. + parallelForEachN(0, Sections.size(), [&](size_t I) { + InputSection *Next = (I + 1) < Sections.size() ? + Sections[I + 1] : nullptr; + InputSection &IS = *Sections[I]; + Result[I] = target->deleteFallThruJmpInsn(IS, IS.getFile(), Next); + }); + size_t NumDeleted = std::count(Result.begin(), Result.end(), true); + if (NumDeleted > 0) { + script->assignAddresses(); + LLVM_DEBUG(llvm::dbgs() << "Removing " << NumDeleted << + " fall through jumps\n"); + } + + auto MaxIt = config->shrinkJumpsAggressively ? Sections.end() : + std::max_element(Sections.begin(), Sections.end(), + [](InputSection * const s1, InputSection * const s2) { + return s1->alignment < s2->alignment; + }); + uint32_t MaxAlign = (MaxIt != Sections.end()) ? (*MaxIt)->alignment : 0; + + // Shrink jump Instructions optimistically + std::vector Shrunk(Sections.size(), 0); + std::vector Changed(Sections.size(), 0); + bool AnyChanged = false; + do { + AnyChanged = false; + parallelForEachN(0, Sections.size(), [&](size_t I) { + InputSection &IS = *Sections[I]; + unsigned BytesShrunk = target->shrinkJmpInsn(IS, IS.getFile(), MaxAlign); + Changed[I] = (BytesShrunk > 0); + Shrunk[I] += BytesShrunk; + }); + AnyChanged = std::any_of(Changed.begin(), Changed.end(), + [] (bool e) {return e;}); + size_t Num = std::count_if(Shrunk.begin(), Shrunk.end(), + [] (int e) { return e > 0; }); + Num += std::count_if(Shrunk.begin(), Shrunk.end(), + [] (int e) { return e > 4; }); + if (Num > 0) + LLVM_DEBUG(llvm::dbgs() << "Output Section :" << OS->name << + " : Shrinking " << Num << " jmp instructions\n"); + if (AnyChanged) + script->assignAddresses(); + } while (AnyChanged); + + if (config->shrinkJumpsAggressively) { + // Grow jump instructions when necessary + std::vector Grown(Sections.size(), 0); + do { + AnyChanged = false; + parallelForEachN(0, Sections.size(), [&](size_t I) { + InputSection &IS = *Sections[I]; + unsigned BytesGrown = target->growJmpInsn(IS, IS.getFile(), MaxAlign); + Changed[I] = (BytesGrown > 0); + Grown[I] += BytesGrown; + }); + size_t Num = std::count_if(Grown.begin(), Grown.end(), + [] (int e) { return e > 0; }); + Num += std::count_if(Grown.begin(), Grown.end(), + [] (int e) { return e > 4; }); + if (Num > 0) + LLVM_DEBUG(llvm::dbgs() << "Output Section :" << OS->name << + " : Growing " << Num << " jmp instructions\n"); + AnyChanged = std::any_of(Changed.begin(), Changed.end(), + [] (bool e) {return e;}); + if (AnyChanged) + script->assignAddresses(); + } while (AnyChanged); + } + } + + for (OutputSection *OS : outputSections) { + std::vector Sections = getInputSections(OS); + for (InputSection * IS: Sections) + IS->trim(); + } + + fixSymbolsAfterShrinking(); +} + static void finalizeSynthetic(SyntheticSection *sec) { if (sec && sec->isNeeded() && sec->getParent()) sec->finalizeContents(); @@ -1933,6 +2113,15 @@ finalizeSynthetic(in.symTab); finalizeSynthetic(in.ppc64LongBranchTarget); + // Relaxation to delete inter-basic block jumps created by basic block + // sections. + //auto startOptBBJumpTime = system_clock::now(); + optimizeBasicBlockJumps(); + //auto endOptBBJumpTime = system_clock::now(); + //duration OptBBJumpTime = endOptBBJumpTime - startOptBBJumpTime; + //warn("[TIME](s) optimize bb jumps: " + Twine(std::to_string(OptBBJumpTime.count()))); + + // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. Index: lld/include/lld/Common/Args.h =================================================================== --- lld/include/lld/Common/Args.h +++ lld/include/lld/Common/Args.h @@ -28,6 +28,9 @@ int64_t getInteger(llvm::opt::InputArgList &args, unsigned key, int64_t Default); +double getFloat(llvm::opt::InputArgList &args, unsigned key, + double Default); + std::vector getStrings(llvm::opt::InputArgList &args, int id); uint64_t getZOptionValue(llvm::opt::InputArgList &args, int id, StringRef key, Index: lld/test/ELF/lto/linker-script-symbols-ipo.ll =================================================================== --- lld/test/ELF/lto/linker-script-symbols-ipo.ll +++ lld/test/ELF/lto/linker-script-symbols-ipo.ll @@ -4,7 +4,7 @@ ; RUN: echo "bar = foo;" > %t.script ;; Check that without linkerscript bar is inlined. -; RUN: ld.lld %t1.o %t2.o -o %t3 -save-temps +; RUN: ld.lld %t1.o %t2.o -o %t3 -save-temps -no-optimize-bb-jumps ; RUN: llvm-objdump -d %t3 | FileCheck %s --check-prefix=IPO ; IPO: Disassembly of section .text: ; IPO: _start: @@ -12,7 +12,7 @@ ; IPO-NEXT: retq ;; Check that LTO does not do IPO for symbols assigned by script. -; RUN: ld.lld %t1.o %t2.o -o %t4 --script %t.script -save-temps +; RUN: ld.lld %t1.o %t2.o -o %t4 --script %t.script -save-temps -no-optimize-bb-jumps ; RUN: llvm-objdump -d %t4 | FileCheck %s --check-prefix=NOIPO ; NOIPO: Disassembly of section .text: ; NOIPO: foo: Index: lld/test/ELF/lto/wrap-2.ll =================================================================== --- lld/test/ELF/lto/wrap-2.ll +++ lld/test/ELF/lto/wrap-2.ll @@ -2,14 +2,14 @@ ; LTO ; RUN: llvm-as %s -o %t.o ; RUN: llvm-as %S/Inputs/wrap-bar.ll -o %t1.o -; RUN: ld.lld %t.o %t1.o -shared -o %t.so -wrap=bar +; RUN: ld.lld %t.o %t1.o -shared -o %t.so -wrap=bar -no-optimize-bb-jumps ; RUN: llvm-objdump -d %t.so | FileCheck %s ; RUN: llvm-readobj --symbols %t.so | FileCheck -check-prefix=BIND %s ; ThinLTO ; RUN: opt -module-summary %s -o %t.o ; RUN: opt -module-summary %S/Inputs/wrap-bar.ll -o %t1.o -; RUN: ld.lld %t.o %t1.o -shared -o %t.so -wrap=bar +; RUN: ld.lld %t.o %t1.o -shared -o %t.so -wrap=bar -no-optimize-bb-jumps ; RUN: llvm-objdump -d %t.so | FileCheck %s ; RUN: llvm-readobj --symbols %t.so | FileCheck -check-prefix=BIND %s Index: lld/test/ELF/wrap-plt.s =================================================================== --- lld/test/ELF/wrap-plt.s +++ lld/test/ELF/wrap-plt.s @@ -4,7 +4,7 @@ // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t -// RUN: ld.lld -o %t2 %t -wrap foo -shared +// RUN: ld.lld -o %t2 %t -wrap foo -shared -no-optimize-bb-jumps // RUN: llvm-readobj -S -r %t2 | FileCheck %s // RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=DISASM %s Index: lld/test/ELF/x86-64-plt.s =================================================================== --- lld/test/ELF/x86-64-plt.s +++ lld/test/ELF/x86-64-plt.s @@ -1,9 +1,9 @@ // REQUIRES: x86 // RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/shared.s -o %t2.o -// RUN: ld.lld -shared -soname=so %t2.o -o %t2.so -// RUN: ld.lld -shared %t.o %t2.so -o %t -// RUN: ld.lld %t.o %t2.so -o %t3 +// RUN: ld.lld -shared -soname=so %t2.o -o %t2.so -no-optimize-bb-jumps +// RUN: ld.lld -shared %t.o %t2.so -o %t -no-optimize-bb-jumps +// RUN: ld.lld %t.o %t2.so -o %t3 -no-optimize-bb-jumps // RUN: llvm-readobj -S -r %t | FileCheck %s // RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=DISASM %s // RUN: llvm-readobj -S -r %t3 | FileCheck --check-prefix=CHECK2 %s