diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h @@ -24,6 +24,7 @@ Pointer64, PCRel32, PCRel32GOTLoad, + PCRel32REXGOTLoad, PCRel64GOT, GOTOFF64, GOT64, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -221,6 +221,20 @@ /// phase will result in an assert/unreachable during the fixup phase RequestGOTAndTransformToDelta64FromGOT, + /// A PC-relative load of a GOT entry, relaxable if GOT entry target is + /// in-range of the fixup + /// + /// TODO: Explain the optimization + /// + /// Fixup expression + /// Fixup <- Target - (Fixup + 4) + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + // + PCRel32GOTLoadRelaxable, + /// A PC-relative REX load of a GOT entry, relaxable if GOT entry target /// is in-range of the fixup. /// @@ -257,6 +271,27 @@ /// RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable, + /// A GOT entry getter/constructor, transformed to + /// PCRel32ToGOTLoadRelaxable pointing at the GOT entry for the original + /// target. + /// + /// Indicates that this edge should be lowered to a PC32ToGOTLoadRelaxable + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does not + /// already exist. + /// + /// Edges of this kind are usually lowered by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestGOTAndTransformToPCRel32GOTLoadRelaxable, + /// A PC-relative REX load of a Thread Local Variable Pointer (TLVP) entry, /// relaxable if the TLVP entry target is in-range of the fixup. /// @@ -301,6 +336,14 @@ /// only. const char *getEdgeKindName(Edge::Kind K); +/// Optimize the GOT and Stub relocations if the edge target address is in range +/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range, +/// then replace GOT load with lea +/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is +/// in range, replace a indirect jump by plt stub with a direct jump to the +/// target +Error optimize_x86_64_GOTAndStubs(LinkGraph &G); + /// Returns true if the given uint64_t value is in range for a uint32_t. inline bool isInRangeForImmU32(uint64_t Value) { return Value <= std::numeric_limits::max(); @@ -341,6 +384,7 @@ case BranchPCRel32: case BranchPCRel32ToPtrJumpStub: case BranchPCRel32ToPtrJumpStubBypassable: + case PCRel32GOTLoadRelaxable: case PCRel32GOTLoadREXRelaxable: case PCRel32TLVPLoadREXRelaxable: { int64_t Value = diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -54,7 +54,9 @@ E.getKind() == x86_64::RequestGOTAndTransformToDelta64 || E.getKind() == x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable || - E.getKind() == x86_64::RequestGOTAndTransformToDelta64FromGOT; + E.getKind() == x86_64::RequestGOTAndTransformToDelta64FromGOT || + E.getKind() == + x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable; } Symbol &createGOTEntry(Symbol &Target) { @@ -74,6 +76,9 @@ case x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable: E.setKind(x86_64::PCRel32GOTLoadREXRelaxable); break; + case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable: + E.setKind(x86_64::PCRel32GOTLoadRelaxable); + break; case x86_64::RequestGOTAndTransformToDelta64: E.setKind(x86_64::Delta64); break; @@ -149,82 +154,6 @@ const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent[6] = { 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00}; -static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) { - LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n"); - - for (auto *B : G.blocks()) - for (auto &E : B->edges()) - if (E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable) { - // Replace GOT load with LEA only for MOVQ instructions. - constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b}; - if (E.getOffset() < 3 || - strncmp(B->getContent().data() + E.getOffset() - 3, - reinterpret_cast(MOVQRIPRel), 2) != 0) - continue; - - auto &GOTBlock = E.getTarget().getBlock(); - assert(GOTBlock.getSize() == G.getPointerSize() && - "GOT entry block should be pointer sized"); - assert(GOTBlock.edges_size() == 1 && - "GOT entry should only have one outgoing edge"); - - auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); - - int64_t Displacement = TargetAddr - EdgeAddr + 4; - if (Displacement >= std::numeric_limits::min() && - Displacement <= std::numeric_limits::max()) { - // Change the edge kind as we don't go through GOT anymore. This is - // for formal correctness only. Technically, the two relocation kinds - // are resolved the same way. - E.setKind(x86_64::Delta32); - E.setTarget(GOTTarget); - E.setAddend(E.getAddend() - 4); - auto *BlockData = reinterpret_cast( - const_cast(B->getContent().data())); - BlockData[E.getOffset() - 2] = 0x8d; - LLVM_DEBUG({ - dbgs() << " Replaced GOT load wih LEA:\n "; - printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind())); - dbgs() << "\n"; - }); - } - } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubBypassable) { - auto &StubBlock = E.getTarget().getBlock(); - assert( - StubBlock.getSize() == - sizeof(PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent) && - "Stub block should be stub sized"); - assert(StubBlock.edges_size() == 1 && - "Stub block should only have one outgoing edge"); - - auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock(); - assert(GOTBlock.getSize() == G.getPointerSize() && - "GOT block should be pointer sized"); - assert(GOTBlock.edges_size() == 1 && - "GOT block should only have one outgoing edge"); - - auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); - - int64_t Displacement = TargetAddr - EdgeAddr + 4; - if (Displacement >= std::numeric_limits::min() && - Displacement <= std::numeric_limits::max()) { - E.setKind(x86_64::BranchPCRel32); - E.setTarget(GOTTarget); - LLVM_DEBUG({ - dbgs() << " Replaced stub branch with direct branch:\n "; - printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind())); - dbgs() << "\n"; - }); - } - } - - return Error::success(); -} - static const char *getELFX86_64RelocName(uint32_t Type) { switch (Type) { #define ELF_RELOC(Name, Number) \ @@ -256,8 +185,9 @@ return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer64; case ELF::R_X86_64_GOTPCREL: case ELF::R_X86_64_GOTPCRELX: - case ELF::R_X86_64_REX_GOTPCRELX: return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad; + case ELF::R_X86_64_REX_GOTPCRELX: + return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32REXGOTLoad; case ELF::R_X86_64_GOTPCREL64: return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel64GOT; case ELF::R_X86_64_GOT64: @@ -371,6 +301,11 @@ Kind = x86_64::Pointer64; break; case PCRel32GOTLoad: { + Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable; + Addend = 0; + break; + } + case PCRel32REXGOTLoad: { Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable; Addend = 0; break; @@ -545,7 +480,7 @@ identifyELFSectionStartAndEndSymbols)); // Add GOT/Stubs optimizer pass. - Config.PreFixupPasses.push_back(optimizeELF_x86_64_GOTAndStubs); + Config.PreFixupPasses.push_back(x86_64::optimize_x86_64_GOTAndStubs); } if (auto Err = Ctx->modifyPassConfig(*G, Config)) @@ -563,6 +498,8 @@ return "PCRel32"; case PCRel32GOTLoad: return "PCRel32GOTLoad"; + case PCRel32REXGOTLoad: + return "PCRel32REXGOTLoad"; case PCRel64GOT: return "PCRel64GOT"; case Delta64: diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -498,79 +498,6 @@ } // namespace -static Error optimizeMachO_x86_64_GOTAndStubs(LinkGraph &G) { - LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n"); - - for (auto *B : G.blocks()) - for (auto &E : B->edges()) - if (E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable) { - assert(E.getOffset() >= 3 && "GOT edge occurs too early in block"); - - // Optimize GOT references. - auto &GOTBlock = E.getTarget().getBlock(); - assert(GOTBlock.getSize() == G.getPointerSize() && - "GOT entry block should be pointer sized"); - assert(GOTBlock.edges_size() == 1 && - "GOT entry should only have one outgoing edge"); - - auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); - - // Check that this is a recognized MOV instruction. - // FIXME: Can we assume this? - constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b}; - if (strncmp(B->getContent().data() + E.getOffset() - 3, - reinterpret_cast(MOVQRIPRel), 2) != 0) - continue; - - int64_t Displacement = TargetAddr - EdgeAddr + 4; - if (Displacement >= std::numeric_limits::min() && - Displacement <= std::numeric_limits::max()) { - E.setTarget(GOTTarget); - E.setKind(x86_64::Delta32); - E.setAddend(E.getAddend() - 4); - char *BlockData = B->getMutableContent(G).data(); - BlockData[E.getOffset() - 2] = (char)0x8d; - LLVM_DEBUG({ - dbgs() << " Replaced GOT load wih LEA:\n "; - printEdge(dbgs(), *B, E, x86_64::getEdgeKindName(E.getKind())); - dbgs() << "\n"; - }); - } - } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubBypassable) { - auto &StubBlock = E.getTarget().getBlock(); - assert(StubBlock.getSize() == sizeof(x86_64::PointerJumpStubContent) && - "Stub block should be stub sized"); - assert(StubBlock.edges_size() == 1 && - "Stub block should only have one outgoing edge"); - - auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock(); - assert(GOTBlock.getSize() == G.getPointerSize() && - "GOT block should be pointer sized"); - assert(GOTBlock.edges_size() == 1 && - "GOT block should only have one outgoing edge"); - - auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); - - int64_t Displacement = TargetAddr - EdgeAddr + 4; - if (Displacement >= std::numeric_limits::min() && - Displacement <= std::numeric_limits::max()) { - E.setKind(x86_64::BranchPCRel32); - E.setTarget(GOTTarget); - LLVM_DEBUG({ - dbgs() << " Replaced stub branch with direct branch:\n "; - printEdge(dbgs(), *B, E, x86_64::getEdgeKindName(E.getKind())); - dbgs() << "\n"; - }); - } - } - - return Error::success(); -} - namespace llvm { namespace jitlink { @@ -618,7 +545,7 @@ PerGraphGOTAndPLTStubsBuilder_MachO_x86_64::asPass); // Add GOT/Stubs optimizer pass. - Config.PreFixupPasses.push_back(optimizeMachO_x86_64_GOTAndStubs); + Config.PreFixupPasses.push_back(x86_64::optimize_x86_64_GOTAndStubs); } if (auto Err = Ctx->modifyPassConfig(*G, Config)) diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp --- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp @@ -50,6 +50,10 @@ return "PCRel32GOTLoadREXRelaxable"; case RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable: return "RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable"; + case PCRel32GOTLoadRelaxable: + return "PCRel32GOTLoadRelaxable"; + case RequestGOTAndTransformToPCRel32GOTLoadRelaxable: + return "RequestGOTAndTransformToPCRel32GOTLoadRelaxable"; case PCRel32TLVPLoadREXRelaxable: return "PCRel32TLVPLoadREXRelaxable"; case RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable: @@ -65,6 +69,79 @@ const char PointerJumpStubContent[6] = { static_cast(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00}; +Error optimize_x86_64_GOTAndStubs(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n"); + + for (auto *B : G.blocks()) + for (auto &E : B->edges()) + if (E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable) { + // Replace GOT load with LEA only for MOVQ instructions. + assert(E.getOffset() >= 3 && "GOT edge occurs too early in block"); + + constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b}; + if (strncmp(B->getContent().data() + E.getOffset() - 3, + reinterpret_cast(MOVQRIPRel), 2) != 0) + continue; + + auto &GOTBlock = E.getTarget().getBlock(); + assert(GOTBlock.getSize() == G.getPointerSize() && + "GOT entry block should be pointer sized"); + assert(GOTBlock.edges_size() == 1 && + "GOT entry should only have one outgoing edge"); + + auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); + JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); + JITTargetAddress TargetAddr = GOTTarget.getAddress(); + + int64_t Displacement = TargetAddr - EdgeAddr + 4; + if (isInRangeForImmS32(Displacement)) { + // Change the edge kind as we don't go through GOT anymore. This is + // for formal correctness only. Technically, the two relocation kinds + // are resolved the same way. + E.setKind(x86_64::Delta32); + E.setTarget(GOTTarget); + E.setAddend(E.getAddend() - 4); + auto *BlockData = reinterpret_cast( + const_cast(B->getContent().data())); + BlockData[E.getOffset() - 2] = 0x8d; + LLVM_DEBUG({ + dbgs() << " Replaced GOT load wih LEA:\n "; + printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind())); + dbgs() << "\n"; + }); + } + } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubBypassable) { + auto &StubBlock = E.getTarget().getBlock(); + assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) && + "Stub block should be stub sized"); + assert(StubBlock.edges_size() == 1 && + "Stub block should only have one outgoing edge"); + + auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock(); + assert(GOTBlock.getSize() == G.getPointerSize() && + "GOT block should be pointer sized"); + assert(GOTBlock.edges_size() == 1 && + "GOT block should only have one outgoing edge"); + + auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); + JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); + JITTargetAddress TargetAddr = GOTTarget.getAddress(); + + int64_t Displacement = TargetAddr - EdgeAddr + 4; + if (isInRangeForImmS32(Displacement)) { + E.setKind(x86_64::BranchPCRel32); + E.setTarget(GOTTarget); + LLVM_DEBUG({ + dbgs() << " Replaced stub branch with direct branch:\n "; + printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind())); + dbgs() << "\n"; + }); + } + } + + return Error::success(); +} + } // end namespace x86_64 } // end namespace jitlink } // end namespace llvm