diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h --- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -45,6 +45,13 @@ class Triple; class Twine; class Value; +class MCDisassembler; +class MCInstrAnalysis; + +namespace jitlink { +class LinkGraph; +class Symbol; +} // namespace jitlink namespace orc { @@ -557,6 +564,33 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, ValueToValueMapTy &VMap); +/// Introduce relocations to \p Sym in its own definition if there are any +/// pointers formed via PC-relative address that do not already have a +/// relocation. +/// +/// This is useful when introducing indirection via a stub function at link time +/// without compiler support. If a function pointer is formed without a +/// relocation, e.g. in the definition of \c foo +/// +/// \code +/// _foo: +/// leaq -7(%rip), rax # form pointer to _foo without relocation +/// _bar: +/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo' +/// \endcode +/// +/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we +/// introduce a stub for _foo. If the pointer is used as a key, this may be +/// observable to the program. This pass will attempt to introduce the missing +/// "self-relocation" on the leaq instruction. +/// +/// This is based on disassembly and should be considered "best effort". It may +/// silently fail to add relocations. +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA); + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -158,6 +158,11 @@ evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const; + /// Given an instruction with a memory operand that could require relocation, + /// returns the offset within the instruction of that relocation. + virtual Optional + getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const; + /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries. virtual std::vector> findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -51,6 +51,7 @@ OrcShared OrcTargetProcess MC + MCDisassembler Passes RuntimeDyld Support diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -9,12 +9,17 @@ #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" #include +#define DEBUG_TYPE "orc" + using namespace llvm; using namespace llvm::orc; @@ -372,5 +377,77 @@ Dst.addModuleFlag(MapMetadata(MF, VMap)); } +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA) { + // AArch64 appears to already come with the necessary relocations. Among other + // architectures, only x86_64 is currently implemented here. + if (G.getTargetTriple().getArch() != Triple::x86_64) + return Error::success(); + + raw_null_ostream CommentStream; + auto &STI = Disassembler.getSubtargetInfo(); + + // Determine the function bounds + auto &B = Sym.getBlock(); + assert(!B.isZeroFill() && "expected content block"); + auto SymAddress = Sym.getAddress(); + auto SymStartInBlock = + (const uint8_t *)B.getContent().data() + Sym.getOffset(); + auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset(); + auto Content = makeArrayRef(SymStartInBlock, SymSize); + + LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n"); + + SmallDenseSet ExistingRelocations; + for (auto &E : B.edges()) { + if (E.isRelocation()) + ExistingRelocations.insert(E.getOffset()); + } + + size_t I = 0; + while (I < Content.size()) { + MCInst Instr; + uint64_t InstrSize = 0; + uint64_t InstrStart = SymAddress + I; + auto DecodeStatus = Disassembler.getInstruction( + Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream); + if (DecodeStatus != MCDisassembler::Success) { + LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address " + << InstrStart); + return make_error( + formatv("failed to disassemble at address {0:x16}", InstrStart), + inconvertibleErrorCode()); + } + // Advance to the next instruction. + I += InstrSize; + + // Check for a PC-relative address equal to the symbol itself. + auto PCRelAddr = + MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize); + if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress) + continue; + + auto RelocOffInInstr = + MIA.getMemoryOperandRelocationOffset(Instr, InstrSize); + if (!RelocOffInInstr.hasValue() || + InstrSize - RelocOffInInstr.getValue() != 4) { + LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at " + << InstrStart); + continue; + } + + auto RelocOffInBlock = + InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset(); + if (ExistingRelocations.contains(RelocOffInBlock)) + continue; + + LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart); + B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4); + } + return Error::success(); +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -34,3 +34,9 @@ uint64_t Size) const { return None; } + +Optional +MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const { + return None; +} \ No newline at end of file diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -408,6 +408,9 @@ const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const override; + Optional + getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const override; }; #define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS @@ -557,6 +560,30 @@ return None; } +Optional +X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const { + if (Inst.getOpcode() != X86::LEA64r) + return None; + const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); + int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); + if (MemOpStart == -1) + return None; + MemOpStart += X86II::getOperandBias(MCID); + const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); + const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg); + const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg); + const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt); + const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp); + // Must be a simple rip-relative address. + if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 || + IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm()) + return None; + // rip-relative ModR/M immediate is 32 bits. + assert(Size > 4 && "invalid instruction size for rip-relative lea"); + return Size - 4; +} + } // end of namespace X86_MC } // end of namespace llvm diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s @@ -0,0 +1,35 @@ + .section __TEXT,__text,regular,pure_instructions + + .globl _form_func_ptr +_form_func_ptr: + leaq _form_func_ptr(%rip), %rax + leaq _other(%rip), %rax + leaq _form_func_ptr(%rip), %rax + nop + leaq _form_func_ptr(%rip), %rax + retq + + .globl _other +_other: + leaq _form_func_ptr(%rip), %rax + retq + +# Return 0 if the pointers formed inside and outside the function are the same. + .globl _main +_main: + pushq %rbp + movq %rsp, %rbp + subq $32, %rsp + movl $0, -4(%rbp) + callq _form_func_ptr + movq %rax, -16(%rbp) + callq _other + movq %rax, -24(%rbp) + movq -16(%rbp), %rax + cmpq -24(%rbp), %rax + setne %al + andb $1, %al + movzbl %al, %eax + addq $32, %rsp + popq %rbp + retq \ No newline at end of file diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test @@ -0,0 +1,34 @@ +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s +# RUN: llvm-jitlink -noexec -show-graph %t 2>&1 | \ +# RUN: FileCheck -check-prefix=WITHOUT %s +# RUN: llvm-jitlink -noexec -show-graph -add-self-relocations %t 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK %s + +# WITHOUT: block +# WITHOUT-NEXT: symbols +# WITHOUT-NEXT: _form_func_ptr +# WITHOUT-NEXT: edges +# WITHOUT-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other +# WITHOUT-NOT: kind = + +# CHECK: block +# CHECK-NEXT: symbols +# CHECK-NEXT: _form_func_ptr +# CHECK-NEXT: edges +# CHECK-NEXT: (block + 0x00000003), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other +# CHECK-NEXT: (block + 0x00000011), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NEXT: (block + 0x00000019), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NOT: kind = + +# WITHOUT: block +# WITHOUT-NEXT: symbols +# WITHOUT-NEXT: _other +# WITHOUT-NEXT: edges +# WITHOUT-NEXT: kind = Delta32, target = _form_func_ptr + +# CHECK: block +# CHECK-NEXT: symbols +# CHECK-NEXT: _other +# CHECK-NEXT: edges +# CHECK-NEXT: kind = Delta32, target = _form_func_ptr diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s +# RUN: llvm-jitlink -show-graph %t +# RUN: llvm-jitlink -show-graph -add-self-relocations %t + +# Ensure that the added relocation does not create an incorrect pointer. + +# Execution test +# REQUIRES: system-darwin && native && target-x86_64 diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -21,6 +21,7 @@ #include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/MachOPlatform.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" @@ -28,6 +29,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -178,6 +180,11 @@ OrcRuntime("orc-runtime", cl::desc("Use ORC runtime from given path"), cl::init(""), cl::cat(JITLinkCategory)); +static cl::opt AddSelfRelocations( + "add-self-relocations", + cl::desc("Add relocations to function pointers to the current function"), + cl::init(false), cl::cat(JITLinkCategory)); + ExitOnError ExitOnErr; LLVM_ATTRIBUTE_USED void linkComponents() { @@ -194,6 +201,8 @@ UseTestResultOverride = true; } +static Error addSelfRelocations(LinkGraph &G); + namespace llvm { static raw_ostream & @@ -1086,6 +1095,9 @@ dumpSectionContents(outs(), G); return Error::success(); }); + + if (AddSelfRelocations) + PassConfig.PostPrunePasses.push_back(addSelfRelocations); } Expected Session::findFileInfo(StringRef FileName) { @@ -1374,14 +1386,21 @@ return Error::success(); } -static Error runChecks(Session &S) { - const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); - - if (CheckFiles.empty()) - return Error::success(); - - LLVM_DEBUG(dbgs() << "Running checks...\n"); +namespace { +struct TargetInfo { + const Target *TheTarget; + std::unique_ptr STI; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr Ctx; + std::unique_ptr Disassembler; + std::unique_ptr MII; + std::unique_ptr MIA; + std::unique_ptr InstPrinter; +}; +} // anonymous namespace +static TargetInfo getTargetInfo(const Triple &TT) { auto TripleName = TT.str(); std::string ErrorStr; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, ErrorStr); @@ -1412,19 +1431,49 @@ TripleName, inconvertibleErrorCode())); - MCContext Ctx(Triple(TripleName), MAI.get(), MRI.get(), STI.get()); + auto Ctx = std::make_unique(Triple(TripleName), MAI.get(), + MRI.get(), STI.get()); std::unique_ptr Disassembler( - TheTarget->createMCDisassembler(*STI, Ctx)); + TheTarget->createMCDisassembler(*STI, *Ctx)); if (!Disassembler) ExitOnErr(make_error("Unable to create disassembler for " + TripleName, inconvertibleErrorCode())); std::unique_ptr MII(TheTarget->createMCInstrInfo()); + if (!MII) + ExitOnErr(make_error("Unable to create instruction info for" + + TripleName, + inconvertibleErrorCode())); + + std::unique_ptr MIA( + TheTarget->createMCInstrAnalysis(MII.get())); + if (!MIA) + ExitOnErr(make_error( + "Unable to create instruction analysis for" + TripleName, + inconvertibleErrorCode())); std::unique_ptr InstPrinter( TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI)); + if (!InstPrinter) + ExitOnErr(make_error( + "Unable to create instruction printer for" + TripleName, + inconvertibleErrorCode())); + return {TheTarget, std::move(STI), std::move(MRI), + std::move(MAI), std::move(Ctx), std::move(Disassembler), + std::move(MII), std::move(MIA), std::move(InstPrinter)}; +} + +static Error runChecks(Session &S) { + const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); + + if (CheckFiles.empty()) + return Error::success(); + + LLVM_DEBUG(dbgs() << "Running checks...\n"); + + auto TI = getTargetInfo(TT); auto IsSymbolValid = [&S](StringRef Symbol) { return S.isSymbolRegistered(Symbol); @@ -1448,8 +1497,8 @@ RuntimeDyldChecker Checker( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetGOTInfo, - TT.isLittleEndian() ? support::little : support::big, Disassembler.get(), - InstPrinter.get(), dbgs()); + TT.isLittleEndian() ? support::little : support::big, + TI.Disassembler.get(), TI.InstPrinter.get(), dbgs()); std::string CheckLineStart = "# " + CheckName + ":"; for (auto &CheckFile : CheckFiles) { @@ -1463,6 +1512,16 @@ return Error::success(); } +static Error addSelfRelocations(LinkGraph &G) { + auto TI = getTargetInfo(G.getTargetTriple()); + for (auto *Sym : G.defined_symbols()) + if (Sym->isCallable()) + if (auto Err = addFunctionPointerRelocationsToCurrentSymbol( + *Sym, G, *TI.Disassembler, *TI.MIA)) + return Err; + return Error::success(); +} + static void dumpSessionStats(Session &S) { if (!ShowSizes) return;