Index: compiler-rt/lib/orc/CMakeLists.txt =================================================================== --- compiler-rt/lib/orc/CMakeLists.txt +++ compiler-rt/lib/orc/CMakeLists.txt @@ -87,7 +87,10 @@ LINK_LIBS ${ORC_LINK_LIBS} PARENT_TARGET orc) else() # not Apple - add_asm_sources(ORC_ASM_SOURCES elfnix_tls.x86-64.S) + add_asm_sources(ORC_ASM_SOURCES + elfnix_tls.x86-64.S + elfnix_tls.aarch64.S + ) foreach(arch ${ORC_SUPPORTED_ARCH}) if(NOT CAN_TARGET_${arch}) Index: compiler-rt/lib/orc/elfnix_platform.cpp =================================================================== --- compiler-rt/lib/orc/elfnix_platform.cpp +++ compiler-rt/lib/orc/elfnix_platform.cpp @@ -63,11 +63,17 @@ return Error::success(); } + struct TLSInfoEntry { unsigned long Key = 0; unsigned long DataAddress = 0; }; +struct TLSDescriptor { + void (*Resolver)(void *); + TLSInfoEntry *InfoEntry; +}; + class ELFNixPlatformRuntimeState { private: struct AtExitEntry { @@ -501,6 +507,11 @@ reinterpret_cast(static_cast(D->DataAddress))); } +ORC_RT_INTERFACE void * +___orc_rt_elfnix_tlsdesc_resolver_impl(TLSDescriptor *D) { + return __orc_rt_elfnix_tls_get_addr_impl(D->InfoEntry); +} + ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult __orc_rt_elfnix_create_pthread_key(char *ArgData, size_t ArgSize) { return WrapperFunction(void)>::handle( Index: compiler-rt/lib/orc/elfnix_tls.aarch64.S =================================================================== --- /dev/null +++ compiler-rt/lib/orc/elfnix_tls.aarch64.S @@ -0,0 +1,92 @@ +//===-- elfnix_tlv.aarch64.s ---------------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of the ORC runtime support library. +// +//===----------------------------------------------------------------------===// + +// The content of this file is aarch64-only +#if defined(__arm64__) || defined(__aarch64__) + +#define REGISTER_SAVE_SPACE_SIZE 32 * 24 + + .text + + // returns address of TLV in x0, all other registers preserved + .globl ___orc_rt_elfnix_tlsdesc_resolver +___orc_rt_elfnix_tlsdesc_resolver: + sub sp, sp, #REGISTER_SAVE_SPACE_SIZE + stp x29, x30, [sp, #16 * 1] + stp x27, x28, [sp, #16 * 2] + stp x25, x26, [sp, #16 * 3] + stp x23, x24, [sp, #16 * 4] + stp x21, x22, [sp, #16 * 5] + stp x19, x20, [sp, #16 * 6] + stp x17, x18, [sp, #16 * 7] + stp x15, x16, [sp, #16 * 8] + stp x13, x14, [sp, #16 * 9] + stp x11, x12, [sp, #16 * 10] + stp x9, x10, [sp, #16 * 11] + stp x7, x8, [sp, #16 * 12] + stp x5, x6, [sp, #16 * 13] + stp x3, x4, [sp, #16 * 14] + stp x1, x2, [sp, #16 * 15] + stp q30, q31, [sp, #32 * 8] + stp q28, q29, [sp, #32 * 9] + stp q26, q27, [sp, #32 * 10] + stp q24, q25, [sp, #32 * 11] + stp q22, q23, [sp, #32 * 12] + stp q20, q21, [sp, #32 * 13] + stp q18, q19, [sp, #32 * 14] + stp q16, q17, [sp, #32 * 15] + stp q14, q15, [sp, #32 * 16] + stp q12, q13, [sp, #32 * 17] + stp q10, q11, [sp, #32 * 18] + stp q8, q9, [sp, #32 * 19] + stp q6, q7, [sp, #32 * 20] + stp q4, q5, [sp, #32 * 21] + stp q2, q3, [sp, #32 * 22] + stp q0, q1, [sp, #32 * 23] + + bl ___orc_rt_elfnix_tlsdesc_resolver_impl + + ldp q0, q1, [sp, #32 * 23] + ldp q2, q3, [sp, #32 * 22] + ldp q4, q5, [sp, #32 * 21] + ldp q6, q7, [sp, #32 * 20] + ldp q8, q9, [sp, #32 * 19] + ldp q10, q11, [sp, #32 * 18] + ldp q12, q13, [sp, #32 * 17] + ldp q14, q15, [sp, #32 * 16] + ldp q16, q17, [sp, #32 * 15] + ldp q18, q19, [sp, #32 * 14] + ldp q20, q21, [sp, #32 * 13] + ldp q22, q23, [sp, #32 * 12] + ldp q24, q25, [sp, #32 * 11] + ldp q26, q27, [sp, #32 * 10] + ldp q28, q29, [sp, #32 * 9] + ldp q30, q31, [sp, #32 * 8] + ldp x1, x2, [sp, #16 * 15] + ldp x3, x4, [sp, #16 * 14] + ldp x5, x6, [sp, #16 * 13] + ldp x7, x8, [sp, #16 * 12] + ldp x9, x10, [sp, #16 * 11] + ldp x11, x12, [sp, #16 * 10] + ldp x13, x14, [sp, #16 * 9] + ldp x15, x16, [sp, #16 * 8] + ldp x17, x18, [sp, #16 * 7] + ldp x19, x20, [sp, #16 * 6] + ldp x21, x22, [sp, #16 * 5] + ldp x23, x24, [sp, #16 * 4] + ldp x25, x26, [sp, #16 * 3] + ldp x27, x28, [sp, #16 * 2] + ldp x29, x30, [sp, #16 * 1] + add sp, sp, #REGISTER_SAVE_SPACE_SIZE + ret + +#endif // defined(__arm64__) || defined(__aarch64__) Index: compiler-rt/test/orc/TestCases/Linux/aarch64/trivial-tls.S =================================================================== --- /dev/null +++ compiler-rt/test/orc/TestCases/Linux/aarch64/trivial-tls.S @@ -0,0 +1,66 @@ +// RUN: %clang -c -o %t %s +// RUN: %llvm_jitlink %t +// +// Test that basic ELF TLS work by adding together TLSs with values +// 0, 1, and -1, and returning the result (0 for success). This setup +// tests both zero-initialized (.tbss) and non-zero-initialized +// (.tdata) sections. + + .text + .file "tlstest.cpp" + .globl main + .p2align 2 + .type main,@function +main: + stp x29, x30, [sp, #-16]! + mov x29, sp + adrp x0, :tlsdesc:x + ldr x1, [x0, :tlsdesc_lo12:x] + add x0, x0, :tlsdesc_lo12:x + .tlsdesccall x + blr x1 + mrs x8, TPIDR_EL0 + ldr w9, [x8, x0] + adrp x0, :tlsdesc:y + ldr x1, [x0, :tlsdesc_lo12:y] + add x0, x0, :tlsdesc_lo12:y + .tlsdesccall y + blr x1 + ldr w10, [x8, x0] + add w9, w10, w9 + adrp x0, :tlsdesc:z + ldr x1, [x0, :tlsdesc_lo12:z] + add x0, x0, :tlsdesc_lo12:z + .tlsdesccall z + blr x1 + ldr w8, [x8, x0] + add w0, w9, w8 + ldp x29, x30, [sp], #16 + ret +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type x,@object + .section .tdata,"awT",@progbits + .globl x + .p2align 2 +x: + .word 4294967295 + .size x, 4 + + .type y,@object + .section .tbss,"awT",@nobits + .globl y + .p2align 2 +y: + .word 0 + .size y, 4 + + .type z,@object + .section .tdata,"awT",@progbits + .globl z + .p2align 2 +z: + .word 1 + .size z, 4 + \ No newline at end of file Index: llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h =================================================================== --- llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h +++ llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h @@ -22,7 +22,8 @@ namespace aarch64 { enum EdgeKind_aarch64 : Edge::Kind { - Branch26 = Edge::FirstRelocation, + Nop = Edge::FirstRelocation, + Branch26, Pointer32, Pointer64, Pointer64Anon, @@ -31,8 +32,8 @@ MoveWide16, GOTPage21, GOTPageOffset12, - TLVPage21, - TLVPageOffset12, + TLSDescPage21, + TLSDescPageOffset12, PointerToGOT, PairedAddend, LDRLiteral19, @@ -222,9 +223,12 @@ *(little64_t *)FixupPtr = Value; break; } - case TLVPage21: + case Nop: { + break; + } + case TLSDescPage21: + case TLSDescPageOffset12: case GOTPage21: - case TLVPageOffset12: case GOTPageOffset12: case PointerToGOT: { return make_error( @@ -257,13 +261,11 @@ const char *FixupPtr = BlockWorkingMem + E.getOffset(); switch (E.getKind()) { - case aarch64::GOTPage21: - case aarch64::TLVPage21: { + case aarch64::GOTPage21: { KindToSet = aarch64::Page21; break; } - case aarch64::GOTPageOffset12: - case aarch64::TLVPageOffset12: { + case aarch64::GOTPageOffset12: { KindToSet = aarch64::PageOffset12; uint32_t RawInstr = *(const support::ulittle32_t *)FixupPtr; (void)RawInstr; Index: llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp =================================================================== --- llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -64,6 +64,10 @@ ELFPrel64, ELFAdrGOTPage21, ELFLd64GOTLo12, + ELFTLSDescAdrPage21, + ELFTLSDescAddLo12, + ELFTLSDescLd64Lo12, + ELFTLSDescCall, }; static Expected @@ -105,6 +109,14 @@ return ELFAdrGOTPage21; case ELF::R_AARCH64_LD64_GOT_LO12_NC: return ELFLd64GOTLo12; + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: + return ELFTLSDescAdrPage21; + case ELF::R_AARCH64_TLSDESC_ADD_LO12: + return ELFTLSDescAddLo12; + case ELF::R_AARCH64_TLSDESC_LD64_LO12: + return ELFTLSDescLd64Lo12; + case ELF::R_AARCH64_TLSDESC_CALL: + return ELFTLSDescCall; } return make_error( @@ -293,6 +305,22 @@ Kind = aarch64::GOTPageOffset12; break; } + case ELFTLSDescAdrPage21: { + Kind = aarch64::TLSDescPage21; + break; + } + case ELFTLSDescAddLo12: { + Kind = aarch64::TLSDescPageOffset12; + break; + } + case ELFTLSDescLd64Lo12: { + Kind = aarch64::TLSDescPageOffset12; + break; + } + case ELFTLSDescCall: { + Kind = aarch64::Nop; + break; + } }; Edge GE(Kind, Offset, *GraphSymbol, Addend); @@ -343,6 +371,14 @@ return "ELFAdrGOTPage21"; case ELFLd64GOTLo12: return "ELFLd64GOTLo12"; + case ELFTLSDescAdrPage21: + return "ELFTLSDescAdrPage21"; + case ELFTLSDescAddLo12: + return "ELFTLSDescAddLo12"; + case ELFTLSDescLd64Lo12: + return "ELFTLSDescLd64Lo12"; + case ELFTLSDescCall: + return "ELFTLSDescCall"; default: return getGenericEdgeKindName(static_cast(R)); } @@ -355,12 +391,210 @@ aarch64::getEdgeKindName) {} }; +// TLS Info Builder. +class TLSInfoTableManager_ELF_aarch64 + : public TableManager { +public: + static StringRef getSectionName() { return "$__TLSINFO"; } + + static const uint8_t TLSInfoEntryContent[16]; + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { return false; } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + // the TLS Info entry's key value will be written by the fixTLVSectionByName + // pass, so create mutable content. + auto &TLSInfoEntry = G.createMutableContentBlock( + getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), + orc::ExecutorAddr(), 8, 0); + TLSInfoEntry.addEdge(aarch64::Pointer64, 8, Target, 0); + return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false); + } + +private: + Section &getTLSInfoSection(LinkGraph &G) { + if (!TLSInfoTable) + TLSInfoTable = &G.createSection(getSectionName(), MemProt::Read); + return *TLSInfoTable; + } + + ArrayRef getTLSInfoEntryContent() const { + return {reinterpret_cast(TLSInfoEntryContent), + sizeof(TLSInfoEntryContent)}; + } + + Section *TLSInfoTable = nullptr; +}; + +const uint8_t TLSInfoTableManager_ELF_aarch64::TLSInfoEntryContent[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/ +}; + +// TLS Descriptor Builder. +class TLSDescTableManager_ELF_aarch64 + : public TableManager { +public: + TLSDescTableManager_ELF_aarch64( + TLSInfoTableManager_ELF_aarch64 &TLSInfoTableManager) + : TLSInfoTableManager(TLSInfoTableManager) {} + + static StringRef getSectionName() { return "$__TLSDESC"; } + + static const uint8_t TLSDescEntryContent[16]; + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + Edge::Kind KindToSet = Edge::Invalid; + switch (E.getKind()) { + case aarch64::TLSDescPage21: { + KindToSet = aarch64::Page21; + break; + } + case aarch64::TLSDescPageOffset12: { + KindToSet = aarch64::PageOffset12; + break; + } + default: + return false; + } + assert(KindToSet != Edge::Invalid && + "Fell through switch, but no new kind to set"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(KindToSet); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + auto &EntryBlock = + G.createContentBlock(getTLSDescSection(G), getTLSDescBlockContent(), + orc::ExecutorAddr(), 8, 0); + EntryBlock.addEdge(aarch64::Pointer64, 0, getTLSDescResolver(G), 0); + EntryBlock.addEdge(aarch64::Pointer64, 8, + TLSInfoTableManager.getEntryForTarget(G, Target), 0); + return G.addAnonymousSymbol(EntryBlock, 0, 8, false, false); + } + +private: + Section &getTLSDescSection(LinkGraph &G) { + if (!GOTSection) + GOTSection = &G.createSection(getSectionName(), MemProt::Read); + return *GOTSection; + } + + Symbol &getTLSDescResolver(LinkGraph &G) { + if (!TLSDescResolver) + TLSDescResolver = + &G.addExternalSymbol("__tlsdesc_resolver", 8, Linkage::Strong); + return *TLSDescResolver; + } + + ArrayRef getTLSDescBlockContent() { + return {reinterpret_cast(TLSDescEntryContent), + sizeof(TLSDescEntryContent)}; + } + + Section *GOTSection = nullptr; + Symbol *TLSDescResolver = nullptr; + TLSInfoTableManager_ELF_aarch64 &TLSInfoTableManager; +}; + +const uint8_t TLSDescTableManager_ELF_aarch64::TLSDescEntryContent[16] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /*resolver function pointer*/ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 /*pointer to tls info*/ +}; + +// TLS descriptor is used by default for implementing TLVs in aarch64 *nix. With +// TLS descriptor, linker is in charge of returning offset of thread local +// variable allocation from the base of the TLS block. When accessing TLVs, +// codegen emits code that gets thread pointer address from TPIDR_EL0 system +// register and add the offset given by the linker via relocation to it. glibc +// and ld-linux are responsible for allocating TLS block and setting thread +// pointer to the address of TLS block. However, currently there is no public +// libc api for dynamically adding new allocations to this TLS block. +// +// In order to support TLS descriptors, we simply iterate through instructions +// of the blocks and do the following instruction patching. +// +// replace mrs , TPIDR_EL0 -> mov , xzr +// +// This way, tp + offset will become just offset. We can return the actual +// address of TLVs as the offset after this. +class ThreadPointerNullifier_aarch64 { +public: + ThreadPointerNullifier_aarch64() {} + + Error operator()(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Nullifying thread pointers:\n"); + + for (auto *B : G.blocks()) { + bool HasTLV = llvm::any_of(B->edges(), [](auto &E) { + return E.getKind() == aarch64::TLSDescPage21 || + E.getKind() == aarch64::TLSDescPageOffset12; + }); + // Skip the blocks that don't contain TLV access pattern + if (HasTLV) { + if (auto Err = processBlock(G, *B)) { + return Err; + } + } + } + + return Error::success(); + } + +private: + Error processBlock(LinkGraph &G, Block &B) { + using support::ulittle32_t; + LLVM_DEBUG(dbgs() << " Iterating instructions at " + << B.getSection().getName() << " + " + << formatv("{0:x16}", B.getAddress()) << "\n"); + BinaryStreamReader BlockReader( + StringRef(B.getContent().data(), B.getContent().size()), + G.getEndianness()); + + char *BlockContentBase = B.getMutableContent(G).data(); + while (!BlockReader.empty()) { + uint64_t Offset = BlockReader.getOffset(); + + uint32_t Instr; + if (auto Err = BlockReader.readInteger(Instr)) + return Err; + + constexpr uint32_t MRSTPIDRMask = 0xffffffe0; + constexpr uint32_t MovXzrInstr = 0xaa1f03e0; + if ((Instr & MRSTPIDRMask) == 0xd53bd040) { + LLVM_DEBUG(dbgs() << " Patching MRS instruction at " + << B.getSection().getName() << " + " + << formatv("{0:x16}", B.getAddress() + Offset) + << "\n"); + + const uint32_t DstReg = Instr & 0x1f; + const uint32_t NewInstr = MovXzrInstr | DstReg; + ulittle32_t *TargetInstr = + reinterpret_cast(BlockContentBase + Offset); + *TargetInstr = NewInstr; + } + } + + return Error::success(); + } +}; + Error buildTables_ELF_aarch64(LinkGraph &G) { LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); aarch64::GOTTableManager GOT; aarch64::PLTTableManager PLT(GOT); - visitExistingEdges(G, GOT, PLT); + TLSInfoTableManager_ELF_aarch64 TLSInfo; + TLSDescTableManager_ELF_aarch64 TLSDesc(TLSInfo); + visitExistingEdges(G, GOT, PLT, TLSDesc, TLSInfo); return Error::success(); } @@ -407,7 +641,8 @@ else Config.PrePrunePasses.push_back(markAllSymbolsLive); - // Add an in-place GOT/Stubs build pass. + // Add an in-place GOT/TLS/Stubs build pass. + Config.PostPrunePasses.push_back(ThreadPointerNullifier_aarch64()); Config.PostPrunePasses.push_back(buildTables_ELF_aarch64); } Index: llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp =================================================================== --- llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -380,9 +380,8 @@ if (*MachORelocKind == MachOPage21) { Kind = aarch64::Page21; - } else if (*MachORelocKind == MachOTLVPage21) { - Kind = aarch64::TLVPage21; - } else if (*MachORelocKind == MachOGOTPage21) { + } else if (*MachORelocKind == MachOTLVPage21 || + *MachORelocKind == MachOGOTPage21) { Kind = aarch64::GOTPage21; } break; @@ -412,11 +411,7 @@ "immediate instruction with a zero " "addend"); - if (*MachORelocKind == MachOTLVPageOffset12) { - Kind = aarch64::TLVPageOffset12; - } else if (*MachORelocKind == MachOGOTPageOffset12) { - Kind = aarch64::GOTPageOffset12; - } + Kind = aarch64::GOTPageOffset12; break; } case MachOPointerToGOT: Index: llvm/lib/ExecutionEngine/JITLink/aarch64.cpp =================================================================== --- llvm/lib/ExecutionEngine/JITLink/aarch64.cpp +++ llvm/lib/ExecutionEngine/JITLink/aarch64.cpp @@ -44,10 +44,10 @@ return "GOTPage21"; case GOTPageOffset12: return "GOTPageOffset12"; - case TLVPage21: - return "TLVPage21"; - case TLVPageOffset12: - return "TLVPageOffset12"; + case TLSDescPage21: + return "TLSDescPage21"; + case TLSDescPageOffset12: + return "TLSDescPageOffset12"; case PointerToGOT: return "PointerToGOT"; case PairedAddend: Index: llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp =================================================================== --- llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -839,11 +839,13 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges( jitlink::LinkGraph &G, JITDylib &JD) { - // TODO implement TLV support - for (auto *Sym : G.external_symbols()) + for (auto *Sym : G.external_symbols()) { if (Sym->getName() == "__tls_get_addr") { Sym->setName("___orc_rt_elfnix_tls_get_addr"); + } else if (Sym->getName() == "__tlsdesc_resolver") { + Sym->setName("___orc_rt_elfnix_tlsdesc_resolver"); } + } auto *TLSInfoEntrySection = G.findSectionByName("$__TLSINFO"); Index: llvm/test/ExecutionEngine/JITLink/AArch64/ELF_aarch64_tp_nullifier.s =================================================================== --- /dev/null +++ llvm/test/ExecutionEngine/JITLink/AArch64/ELF_aarch64_tp_nullifier.s @@ -0,0 +1,37 @@ +# RUN: llvm-mc -triple=aarch64-unknown-linux-gnu -relax-relocations=false \ +# RUN: -position-independent -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs __tlsdesc_resolver=0xcafef00d -check %s %t.o + .text + + .globl main + .p2align 2 + .type main,@function +main: + ret + + .size main, .-main + +# Check ThreadPointerNullifier pass is working that +# mrs , TPIDR_EL0 is replaced with mov , xzr +# +# jitlink-check: *{4}(tlv_block + 0) = 0xaa1f03e8 +# mov x8, xzr + .globl tlv_block + .p2align 2 + .type tlv_block,@function +tlv_block: + mrs x8, TPIDR_EL0 + adrp x0, :tlsdesc:i + ldr x1, [x0, :tlsdesc_lo12:i] + add x0, x0, :tlsdesc_lo12:i + .tlsdesccall i + blr x1 + add x0, x8, x0 + ret + .size tlv_block, .-tlv_block + + .globl i + .p2align 2 +i: + .word 0 + .size i, 4