diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -773,6 +773,9 @@ if (isExportedToOtherDSO(ESym)) Result |= SymbolRef::SF_Exported; + if (ESym->getType() == ELF::STT_GNU_IFUNC) + Result |= SymbolRef::SF_Indirect; + if (ESym->getVisibility() == ELF::STV_HIDDEN) Result |= SymbolRef::SF_Hidden; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -310,9 +310,12 @@ << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)Addr) << " flags: " << *FlagsOrErr << "\n"); - if (!Name.empty()) // Skip absolute symbol relocations. - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, Addr, *JITSymFlags); + // Skip absolute symbol relocations. + if (!Name.empty()) { + auto Result = GlobalSymbolTable.insert_or_assign( + Name, SymbolTableEntry(SectionID, Addr, *JITSymFlags)); + processNewSymbol(*I, Result.first->getValue()); + } } else if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data || SymType == object::SymbolRef::ST_Unknown || @@ -344,9 +347,12 @@ << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)SectOffset) << " flags: " << *FlagsOrErr << "\n"); - if (!Name.empty()) // Skip absolute symbol relocations - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); + // Skip absolute symbol relocations. + if (!Name.empty()) { + auto Result = GlobalSymbolTable.insert_or_assign( + Name, SymbolTableEntry(SectionID, SectOffset, *JITSymFlags)); + processNewSymbol(*I, Result.first->getValue()); + } } } @@ -632,6 +638,11 @@ RWDataAlign = std::max(RWDataAlign, CommonAlign); } + if (!CodeSectionSizes.empty()) { + // Add 64 bytes for a potential IFunc resolver stub + CodeSectionSizes.push_back(64); + } + // Compute the required allocation space for each different type of sections // (code, read-only data, read-write data) assuming that all sections are // allocated with the max alignment. Note that we cannot compute with the diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -158,6 +158,40 @@ // Map between GOT relocation value and corresponding GOT offset std::map GOTOffsetMap; + /// The ID of the current IFunc stub section + unsigned IFuncStubSectionID = 0; + /// The current offset into the IFunc stub section + uint64_t IFuncStubOffset = 0; + + /// A IFunc stub and its original symbol + struct IFuncStub { + /// The offset of this stub in the IFunc stub section + uint64_t StubOffset; + /// The symbol table entry of the original symbol + SymbolTableEntry OriginalSymbol; + }; + + /// The IFunc stubs + SmallVector IFuncStubs; + + /// Create the code for the IFunc resolver at the given address. This code + /// works together with the stubs created in createIFuncStub() to call the + /// resolver function and then jump to the real function address. + /// It must not be larger than 64B. + void createIFuncResolver(uint8_t *Addr) const; + /// Create the code for an IFunc stub for the IFunc that is defined in + /// section IFuncSectionID at offset IFuncOffset. The IFunc resolver created + /// by createIFuncResolver() is defined in the section IFuncStubSectionID at + /// offset IFuncResolverOffset. The code should be written into the section + /// with the id IFuncStubSectionID at the offset IFuncStubOffset. + void createIFuncStub(unsigned IFuncStubSectionID, + uint64_t IFuncResolverOffset, uint64_t IFuncStubOffset, + unsigned IFuncSectionID, uint64_t IFuncOffset); + /// Return the maximum size of a stub created by createIFuncStub() + unsigned getMaxIFuncStubSize() const; + + void processNewSymbol(const SymbolRef &ObjSymbol, + SymbolTableEntry &Entry) override; bool relocationNeedsGot(const RelocationRef &R) const override; bool relocationNeedsStub(const RelocationRef &R) const override; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -2292,18 +2292,75 @@ return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset); } +void RuntimeDyldELF::processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Symbol) { + // This should never return an error as `processNewSymbol` wouldn't have been + // called if getFlags() returned an error before. + auto ObjSymbolFlags = cantFail(ObjSymbol.getFlags()); + + if (ObjSymbolFlags & SymbolRef::SF_Indirect) { + if (IFuncStubSectionID == 0) { + // Create a dummy section for the ifunc stubs. It will be actually + // allocated in finalizeLoad() below. + IFuncStubSectionID = Sections.size(); + Sections.push_back( + SectionEntry(".text.__llvm_IFuncStubs", nullptr, 0, 0, 0)); + // First 64B are reserverd for the IFunc resolver + IFuncStubOffset = 64; + } + + IFuncStubs.push_back(IFuncStub{IFuncStubOffset, Symbol}); + // Modify the symbol so that it points to the ifunc stub instead of to the + // resolver function. + Symbol = SymbolTableEntry(IFuncStubSectionID, IFuncStubOffset, + Symbol.getFlags()); + IFuncStubOffset += getMaxIFuncStubSize(); + } +} + Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { if (IsMipsO32ABI) if (!PendingRelocs.empty()) return make_error("Can't find matching LO16 reloc"); + // Create the IFunc stubs if necessary. This must be done before processing + // the GOT entries, as the IFunc stubs may create some. + if (IFuncStubSectionID != 0) { + uint8_t *IFuncStubsAddr = MemMgr.allocateCodeSection( + IFuncStubOffset, 1, IFuncStubSectionID, ".text.__llvm_IFuncStubs"); + if (!IFuncStubsAddr) + return make_error( + "Unable to allocate memory for IFunc stubs!"); + Sections[IFuncStubSectionID] = + SectionEntry(".text.__llvm_IFuncStubs", IFuncStubsAddr, IFuncStubOffset, + IFuncStubOffset, 0); + + createIFuncResolver(IFuncStubsAddr); + + LLVM_DEBUG(dbgs() << "Creating IFunc stubs SectionID: " + << IFuncStubSectionID << " Addr: " + << Sections[IFuncStubSectionID].getAddress() << '\n'); + for (auto &IFuncStub : IFuncStubs) { + auto &Symbol = IFuncStub.OriginalSymbol; + LLVM_DEBUG(dbgs() << "\tSectionID: " << Symbol.getSectionID() + << " Offset: " << format("%p", Symbol.getOffset()) + << " IFuncStubOffset: " + << format("%p\n", IFuncStub.StubOffset)); + createIFuncStub(IFuncStubSectionID, 0, IFuncStub.StubOffset, + Symbol.getSectionID(), Symbol.getOffset()); + } + + IFuncStubSectionID = 0; + IFuncStubOffset = 0; + IFuncStubs.clear(); + } + // If necessary, allocate the global offset table if (GOTSectionID != 0) { // Allocate memory for the section size_t TotalSize = CurrentGOTIndex * getGOTEntrySize(); uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(), - GOTSectionID, ".got", false); + GOTSectionID, ".got", false); if (!Addr) return make_error("Unable to allocate memory for GOT!"); @@ -2326,7 +2383,7 @@ section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); - assert (i != SectionMap.end()); + assert(i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; } } @@ -2362,6 +2419,110 @@ return Obj.isELF(); } +void RuntimeDyldELF::createIFuncResolver(uint8_t *Addr) const { + if (Arch == Triple::x86_64) { + // The adddres of the GOT1 entry is in %r11, the GOT2 entry is in %r11+8 + // (see createIFuncStub() for details) + // The following code first saves all registers that contain the original + // function arguments as those registers are not saved by the resolver + // function. %r11 is saved as well so that the GOT2 entry can be updated + // afterwards. Then it calls the actual IFunc resolver function whose + // address is stored in GOT2. After the resolver function returns, all + // saved registers are restored and the return value is written to GOT1. + // Finally, jump to the now resolved function. + // clang-format off + const uint8_t StubCode[] = { + 0x57, // push %rdi + 0x56, // push %rsi + 0x52, // push %rdx + 0x51, // push %rcx + 0x41, 0x50, // push %r8 + 0x41, 0x51, // push %r9 + 0x41, 0x53, // push %r11 + 0x41, 0xff, 0x53, 0x08, // call *0x8(%r11) + 0x41, 0x5b, // pop %r11 + 0x41, 0x59, // pop %r9 + 0x41, 0x58, // pop %r8 + 0x59, // pop %rcx + 0x5a, // pop %rdx + 0x5e, // pop %rsi + 0x5f, // pop %rdi + 0x49, 0x89, 0x03, // mov %rax,(%r11) + 0xff, 0xe0 // jmp *%rax + }; + // clang-format on + static_assert(sizeof(StubCode) <= 64, + "maximum size of the IFunc resolver is 64B"); + memcpy(Addr, StubCode, sizeof(StubCode)); + } else { + report_fatal_error( + "IFunc resolver is not supported for target architecture"); + } +} + +void RuntimeDyldELF::createIFuncStub(unsigned IFuncStubSectionID, + uint64_t IFuncResolverOffset, + uint64_t IFuncStubOffset, + unsigned IFuncSectionID, + uint64_t IFuncOffset) { + auto &IFuncStubSection = Sections[IFuncStubSectionID]; + auto *Addr = IFuncStubSection.getAddressWithOffset(IFuncStubOffset); + + if (Arch == Triple::x86_64) { + // The first instruction loads a PC-relative address into %r11 which is a + // GOT entry for this stub. This initially contains the address to the + // IFunc resolver. We can use %r11 here as it's caller saved but not used + // to pass any arguments. In fact, x86_64 ABI even suggests using %r11 for + // code in the PLT. The IFunc resolver will use %r11 to update the GOT + // entry. + // + // The next instruction just jumps to the address contained in the GOT + // entry. As mentioned above, we do this two-step jump by first setting + // %r11 so that the IFunc resolver has access to it. + // + // The IFunc resolver of course also needs to know the actual address of + // the actual IFunc resolver function. This will be stored in a GOT entry + // right next to the first one for this stub. So, the IFunc resolver will + // be able to call it with %r11+8. + // + // In total, two adjacent GOT entries (+relocation) and one additional + // relocation are required: + // GOT1: Address of the IFunc resolver. + // GOT2: Address of the IFunc resolver function. + // IFuncStubOffset+3: 32-bit PC-relative address of GOT1. + uint64_t GOT1 = allocateGOTEntries(2); + uint64_t GOT2 = GOT1 + getGOTEntrySize(); + + RelocationEntry RE1(GOTSectionID, GOT1, ELF::R_X86_64_64, + IFuncResolverOffset, {}); + addRelocationForSection(RE1, IFuncStubSectionID); + RelocationEntry RE2(GOTSectionID, GOT2, ELF::R_X86_64_64, IFuncOffset, {}); + addRelocationForSection(RE2, IFuncSectionID); + + const uint8_t StubCode[] = { + 0x4c, 0x8d, 0x1d, 0x00, 0x00, 0x00, 0x00, // leaq 0x0(%rip),%r11 + 0x41, 0xff, 0x23 // jmpq *(%r11) + }; + assert(sizeof(StubCode) <= getMaxIFuncStubSize() && + "IFunc stub size must not exceed getMaxIFuncStubSize()"); + memcpy(Addr, StubCode, sizeof(StubCode)); + + // The PC-relative value starts 4 bytes from the end of the leaq + // instruction, so the addend is -4. + resolveGOTOffsetRelocation(IFuncStubSectionID, IFuncStubOffset + 3, + GOT1 - 4, ELF::R_X86_64_PC32); + } else { + report_fatal_error("IFunc stub is not supported for target architecture"); + } +} + +unsigned RuntimeDyldELF::getMaxIFuncStubSize() const { + if (Arch == Triple::x86_64) { + return 10; + } + return 0; +} + bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { unsigned RelTy = R.getType(); if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -435,6 +435,10 @@ // Return size of Global Offset Table (GOT) entry virtual size_t getGOTEntrySize() { return 0; } + // Hook for the subclasses to do further processing when a symbol is added to + // the global symbol table. This function may modify the symbol table entry. + virtual void processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Entry) {} + // Return true if the relocation R may require allocating a GOT entry. virtual bool relocationNeedsGot(const RelocationRef &R) const { return false; diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s @@ -0,0 +1,109 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: split-file %s %t +# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/test_runner.o %t/test_runner.s +# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/func_defs.o %t/func_defs.s +# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -verify -check=%s %t/test_runner.o %t/func_defs.o +# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -execute %t/test_runner.o %t/func_defs.o + +#--- test_runner.s + +# The _main function of this file contains calls to the two external functions +# "indirect_func" and "normal_func" that are not yet defined. They are called via +# the PLT to simulate how a compiler would emit a call to an external function. +# Eventually, indirect_func will resolve to a STT_GNU_IFUNC and normal_func to a +# regular function. We include calls to both types of functions in this test to +# test that both types of functions are executed correctly when their types are +# not known initially. +# It also contains a call to a locally defined indirect function. As RuntimeDyld +# treats local functions a bit differently than external functions, we also test +# that. +# Verify that the functions return the excpeted value. If the external indirect +# function call fails, this returns the error code 1. If the external normal +# function call fails, it's the error code 2. If the call to the locally +# defined indirect function fails, return the error code 3. + +local_real_func: + mov $0x56, %eax + ret + +local_indirect_func_resolver: + lea local_real_func(%rip), %rax + ret + + .type local_indirect_func, @gnu_indirect_function + .set local_indirect_func, local_indirect_func_resolver + + .global _main +_main: + call indirect_func@plt + cmp $0x12, %eax + je 1f + mov $1, %eax + ret +1: + + call normal_func@plt + cmp $0x34, %eax + je 1f + mov $2, %eax + ret +1: + + call local_indirect_func@plt + cmp $0x56, %eax + je 1f + mov $3, %eax + ret +1: + + xor %eax, %eax + ret + +# Test that the indirect functions have the same addresses in both calls. +# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2) +test_indirect_func_address_1: + lea indirect_func(%rip), %rax + +test_indirect_func_address_2: + lea indirect_func(%rip), %rax + +# rtdyld-check: decode_operand(test_local_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_local_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2) +test_local_indirect_func_address_1: + lea local_indirect_func(%rip), %rax + +test_local_indirect_func_address_2: + lea local_indirect_func(%rip), %rax + +#--- func_defs.s + +# This file contains the external functions that are called above. The type of +# the indirect function is set to @gnu_indirect_function and its value is set +# to the value of ifunc_resolver. This is what gcc emits when using +# __attribute__((ifunc("ifunc_resolver"))) in C. The resolver function just +# returns the address of the real function "real_func". +# To test that everyting works correctly, the indirect function returns 0x12 +# and the direct function returns 0x23. This is verified in the _main function +# above. + +real_func: + mov $0x12, %eax + ret + +ifunc_resolver: + lea real_func(%rip), %rax + ret + + .global indirect_func + .type indirect_func, @gnu_indirect_function + .set indirect_func, ifunc_resolver + + .global normal_func +normal_func: + mov $0x34, %eax + ret + +# Test that the address of the indirect function is equal even when it is +# defined in another object file. +# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_3, 4) + next_pc(test_indirect_func_address_3) +test_indirect_func_address_3: + lea indirect_func(%rip), %rax