diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h --- a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h @@ -124,6 +124,69 @@ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup /// phase will result in an assert/unreachable during the fixup phase RequestGOTAndTransformToDelta32FromGOT, + + /// A 32-bit PC-relative branch. + /// + /// Represents a PC-relative call or branch to a target. This can be used to + /// identify, record, and/or patch call sites. + /// + /// The fixup expression for this kind includes an implicit offset to account + /// for the PC (unlike the Delta edges) so that a Branch32PCRel with a target + /// T and addend zero is a call/branch to the start (offset zero) of T. + /// + /// Fixup expression: + /// Fixup <- Target - (Fixup + 4) + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + BranchPCRel32, + + /// A 32-bit PC-relative branch to a pointer jump stub. + /// + /// The target of this relocation should be a pointer jump stub of the form: + /// + /// \code{.s} + /// .text + /// jmp *tgtptr + /// ; ... + /// + /// .data + /// tgtptr: + /// .quad 0 + /// \endcode + /// + /// This edge kind has the same fixup expression as BranchPCRel32, but further + /// identifies the call/branch as being to a pointer jump stub. For edges of + /// this kind the jump stub should not be bypassed (use + /// BranchPCRel32ToPtrJumpStubBypassable for that), but the pointer location + /// target may be recorded to allow manipulation at runtime. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend - 4 : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + BranchPCRel32ToPtrJumpStub, + + /// A relaxable version of BranchPCRel32ToPtrJumpStub. + /// + /// The edge kind has the same fixup expression as BranchPCRel32ToPtrJumpStub, + /// but identifies the call/branch as being to a pointer jump stub that may be + /// bypassed with a direct jump to the ultimate target if the ultimate target + /// is within range of the fixup location. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend - 4: int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + BranchPCRel32ToPtrJumpStubBypassable, }; /// Returns a string name for the given i386 edge. For debugging purposes @@ -141,6 +204,12 @@ Value <= std::numeric_limits::max()); } +/// Returns true if the given int64_t value is in range for an int32_t. +inline bool isInRangeForImmS32(int64_t Value) { + return (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()); +} + /// Apply fixup expression for edge to block content. inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, const Symbol *GOTSymbol) { @@ -202,6 +271,15 @@ break; } + case i386::BranchPCRel32: + case i386::BranchPCRel32ToPtrJumpStub: + case i386::BranchPCRel32ToPtrJumpStubBypassable: { + int32_t Value = + E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend(); + *(little32_t *)FixupPtr = Value; + break; + } + default: return make_error( "In graph " + G.getName() + ", section " + B.getSection().getName() + @@ -217,6 +295,13 @@ /// i386 null pointer content. extern const char NullPointerContent[PointerSize]; +/// i386 pointer jump stub content. +/// +/// Contains the instruction sequence for an indirect jump via an in-memory +/// pointer: +/// jmpq *ptr +extern const char PointerJumpStubContent[6]; + /// Creates a new pointer block in the given section and returns an anonymous /// symbol pointing to it. /// @@ -237,6 +322,36 @@ return G.addAnonymousSymbol(B, 0, PointerSize, false, false); } +/// Create a jump stub block that jumps via the pointer at the given symbol. +/// +/// The stub block will have the following default values: +/// alignment: 8-bit +/// alignment-offset: 0 +/// address: highest allowable: (~5U) +inline Block &createPointerJumpStubBlock(LinkGraph &G, Section &StubSection, + Symbol &PointerSymbol) { + auto &B = G.createContentBlock(StubSection, PointerJumpStubContent, + orc::ExecutorAddr(), 8, 0); + B.addEdge(Pointer32, + // Offset is 2 because the the first 2 bytes of the + // jump stub block are {0xff, 0x25} -- an indirect absolute + // jump. + 2, PointerSymbol, 0); + return B; +} + +/// Create a jump stub that jumps via the pointer at the given symbol and +/// an anonymous symbol pointing to it. Return the anonymous symbol. +/// +/// The stub block will be created by createPointerJumpStubBlock. +inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G, + Section &StubSection, + Symbol &PointerSymbol) { + return G.addAnonymousSymbol( + createPointerJumpStubBlock(G, StubSection, PointerSymbol), 0, 6, true, + false); +} + /// Global Offset Table Builder. class GOTTableManager : public TableManager { public: @@ -283,6 +398,54 @@ Section *GOTSection = nullptr; }; +/// Procedure Linkage Table Builder. +class PLTTableManager : public TableManager { +public: + PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {} + + static StringRef getSectionName() { return "$__STUBS"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getKind() == i386::BranchPCRel32 && !E.getTarget().isDefined()) { + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to + // be optimized when the target is in-range. + E.setKind(i386::BranchPCRel32ToPtrJumpStubBypassable); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + return false; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointerJumpStub(G, getStubsSection(G), + GOT.getEntryForTarget(G, Target)); + } + +public: + Section &getStubsSection(LinkGraph &G) { + if (!PLTSection) + PLTSection = &G.createSection(getSectionName(), + orc::MemProt::Read | orc::MemProt::Exec); + return *PLTSection; + } + + GOTTableManager &GOT; + Section *PLTSection = nullptr; +}; + +/// Optimize the GOT and Stub relocations if the edge target address is in range +/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range, +/// then replace GOT load with lea. (THIS IS UNIMPLEMENTED RIGHT NOW!) +/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is +/// in range, replace a indirect jump by plt stub with a direct jump to the +/// target +Error optimizeGOTAndStubAccesses(LinkGraph &G); + } // namespace llvm::jitlink::i386 #endif // LLVM_EXECUTIONENGINE_JITLINK_I386_H diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp --- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp @@ -30,7 +30,8 @@ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); i386::GOTTableManager GOT; - visitExistingEdges(G, GOT); + i386::PLTTableManager PLT(GOT); + visitExistingEdges(G, GOT, PLT); return Error::success(); } } // namespace @@ -130,6 +131,8 @@ return EdgeKind_i386::Delta32; case ELF::R_386_GOTOFF: return EdgeKind_i386::Delta32FromGOT; + case ELF::R_386_PLT32: + return EdgeKind_i386::BranchPCRel32; } return make_error("Unsupported i386 relocation:" + @@ -243,8 +246,11 @@ else Config.PrePrunePasses.push_back(markAllSymbolsLive); - // Add an in-place GOT build pass. + // Add an in-place GOT and PLT build pass. Config.PostPrunePasses.push_back(buildTables_ELF_i386); + + // Add GOT/Stubs optimizer pass. + Config.PreFixupPasses.push_back(i386::optimizeGOTAndStubAccesses); } if (auto Err = Ctx->modifyPassConfig(*G, Config)) return Ctx->notifyFailed(std::move(Err)); diff --git a/llvm/lib/ExecutionEngine/JITLink/i386.cpp b/llvm/lib/ExecutionEngine/JITLink/i386.cpp --- a/llvm/lib/ExecutionEngine/JITLink/i386.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/i386.cpp @@ -34,10 +34,58 @@ return "Delta32FromGOT"; case RequestGOTAndTransformToDelta32FromGOT: return "RequestGOTAndTransformToDelta32FromGOT"; + case BranchPCRel32: + return "BranchPCRel32"; + case BranchPCRel32ToPtrJumpStub: + return "BranchPCRel32ToPtrJumpStub"; + case BranchPCRel32ToPtrJumpStubBypassable: + return "BranchPCRel32ToPtrJumpStubBypassable"; } return getGenericEdgeKindName(K); } const char NullPointerContent[PointerSize] = {0x00, 0x00, 0x00, 0x00}; + +const char PointerJumpStubContent[6] = { + static_cast(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00}; + +Error optimizeGOTAndStubAccesses(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n"); + + for (auto *B : G.blocks()) + for (auto &E : B->edges()) { + if (E.getKind() == i386::BranchPCRel32ToPtrJumpStubBypassable) { + auto &StubBlock = E.getTarget().getBlock(); + assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) && + "Stub block should be stub sized"); + assert(StubBlock.edges_size() == 1 && + "Stub block should only have one outgoing edge"); + + auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock(); + assert(GOTBlock.getSize() == G.getPointerSize() && + "GOT block should be pointer sized"); + assert(GOTBlock.edges_size() == 1 && + "GOT block should only have one outgoing edge"); + + auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); + orc::ExecutorAddr EdgeAddr = B->getAddress() + E.getOffset(); + orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); + + int64_t Displacement = TargetAddr - EdgeAddr + 4; + if (isInRangeForImmS32(Displacement)) { + E.setKind(i386::BranchPCRel32); + E.setTarget(GOTTarget); + LLVM_DEBUG({ + dbgs() << " Replaced stub branch with direct branch:\n "; + printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind())); + dbgs() << "\n"; + }); + } + } + } + + return Error::success(); +} + } // namespace llvm::jitlink::i386 diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations.s b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations.s rename to llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s --- a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s @@ -1,11 +1,11 @@ # RUN: rm -rf %t && mkdir -p %t # RUN: llvm-mc -triple=i386-unknown-linux-gnu -position-independent \ -# RUN: -filetype=obj -o %t/elf_sm_pic_reloc.o %s +# RUN: -filetype=obj -o %t/elf_sm_pic_reloc_got.o %s # RUN: llvm-jitlink -noexec \ # RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ -# RUN: -check %s %t/elf_sm_pic_reloc.o +# RUN: -check %s %t/elf_sm_pic_reloc_got.o # -# Test ELF small/PIC relocations. +# Test ELF small/PIC GOT relocations. .text .globl main @@ -19,11 +19,11 @@ # Test GOT32 handling. # # We want to check both the offset to the GOT entry and its contents. -# jitlink-check: decode_operand(test_got, 4) = got_addr(elf_sm_pic_reloc.o, named_data1) - _GLOBAL_OFFSET_TABLE_ -# jitlink-check: *{4}(got_addr(elf_sm_pic_reloc.o, named_data1)) = named_data1 +# jitlink-check: decode_operand(test_got, 4) = got_addr(elf_sm_pic_reloc_got.o, named_data1) - _GLOBAL_OFFSET_TABLE_ +# jitlink-check: *{4}(got_addr(elf_sm_pic_reloc_got.o, named_data1)) = named_data1 # -# jitlink-check: decode_operand(test_got+6, 4) = got_addr(elf_sm_pic_reloc.o, named_data2) - _GLOBAL_OFFSET_TABLE_ -# jitlink-check: *{4}(got_addr(elf_sm_pic_reloc.o, named_data2)) = named_data2 +# jitlink-check: decode_operand(test_got+6, 4) = got_addr(elf_sm_pic_reloc_got.o, named_data2) - _GLOBAL_OFFSET_TABLE_ +# jitlink-check: *{4}(got_addr(elf_sm_pic_reloc_got.o, named_data2)) = named_data2 .globl test_got .p2align 4, 0x90 diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_plt.s b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_plt.s new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_plt.s @@ -0,0 +1,38 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=i386-unknown-linux-gnu -position-independent \ +# RUN: -filetype=obj -o %t/elf_sm_pic_reloc_plt.o %s +# RUN: /home/ec2-user/llvm-project/build-32/bin/llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ +# RUN: -abs external_func=0xffff0010 \ +# RUN: -check %s %t/elf_sm_pic_reloc_plt.o +# +# Test ELF small/PIC PLT relocations. + +# Empty main entry point. + .text + .globl main + .p2align 4, 0x90 + .type main,@function +main: + ret + .size main, .-main + +# Check R_386_PLT32 handling with a call to an external function via PLT. +# This produces a Branch32 edge that is resolved like a regular PCRel32 +# (no PLT entry created). +# +# NOTE - For ELF/i386 we always optimize away the PLT calls as the +# displacement between the target address and the edge address always +# fits in an int32_t. Regardless, we always create the PLT stub and GOT entry +# for position independent code, first, as there may be future use-cases +# where we would want to disable the optimization. +# +# jitlink-check: decode_operand(test_call_extern_plt, 0) = external_func - next_pc(test_call_extern_plt) +# jitlink-check: *{4}(got_addr(elf_sm_pic_reloc_plt.o, external_func))= external_func + .globl test_call_extern_plt + .p2align 4, 0x90 + .type test_call_extern_plt,@function +test_call_extern_plt: + call external_func@plt + + .size test_call_extern_plt, .-test_call_extern_plt \ No newline at end of file