Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -329,6 +329,9 @@ /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) +/// Bit size of immediate TLS offsets (0 == use the default). +VALUE_CODEGENOPT(TLSSize, 8, 0) + /// Number of path components to strip when emitting checks. (0 == full /// filename) VALUE_CODEGENOPT(EmitCheckPathComponentsToStrip, 32, 0) Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2167,6 +2167,9 @@ def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>; def masm_EQ : Joined<["-"], "masm=">, Group, Flags<[DriverOption]>; def mcmodel_EQ : Joined<["-"], "mcmodel=">, Group; +def mtls_size_EQ : Joined<["-"], "mtls-size=">, Group, Flags<[DriverOption, CC1Option]>, + HelpText<"Specify bit size of immediate TLS offsets (AArch64 ELF only): " + "12 (for 4KB) | 24 (for 16MB, default) | 32 (for 4GB) | 48 (for 256TB, needs -mcmodel=large)">; def mimplicit_it_EQ : Joined<["-"], "mimplicit-it=">, Group; def mdefault_build_attributes : Joined<["-"], "mdefault-build-attributes">, Group; def mno_default_build_attributes : Joined<["-"], "mno-default-build-attributes">, Group; Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -470,6 +470,7 @@ Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.TLSSize = CodeGenOpts.TLSSize; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4528,6 +4528,19 @@ CmdArgs.push_back(A->getValue()); } + if (Arg *A = Args.getLastArg(options::OPT_mtls_size_EQ)) { + StringRef Value = A->getValue(); + unsigned TLSSize = 0; + Value.getAsInteger(10, TLSSize); + if (!Triple.isAArch64() || !Triple.isOSBinFormatELF()) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getOption().getName() << TripleStr; + if (TLSSize != 12 && TLSSize != 24 && TLSSize != 32 && TLSSize != 48) + D.Diag(diag::err_drv_invalid_int_value) + << A->getOption().getName() << Value; + Args.AddLastArg(CmdArgs, options::OPT_mtls_size_EQ); + } + // Add the target cpu std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false); if (!CPU.empty()) { Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1264,6 +1264,8 @@ } } + Opts.TLSSize = getLastArgIntValue(Args, OPT_mtls_size_EQ, 0, Diags); + if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) { StringRef Val = A->getValue(); Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val); Index: clang/test/Driver/tls-size.c =================================================================== --- /dev/null +++ clang/test/Driver/tls-size.c @@ -0,0 +1,26 @@ +// Options for AArch64 ELF +// RUN: %clang -### -target aarch64-linux-gnu -mtls-size=12 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-12 %s +// RUN: %clang -### -target aarch64-linux-gnu -mtls-size=24 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-24 %s +// RUN: %clang -### -target aarch64-linux-gnu -mtls-size=32 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-32 %s +// RUN: %clang -### -target aarch64-linux-gnu -mtls-size=48 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-48 %s + +// Unsupported target +// RUN: not %clang -target aarch64-unknown-windows-msvc -mtls-size=24 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=UNSUPPORTED-TARGET %s +// RUN: not %clang -target x86_64-linux-gnu -mtls-size=24 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=UNSUPPORTED-TARGET %s + +// Invalid option value +// RUN: not %clang -target aarch64-linux-gnu -mtls-size=0 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=INVALID-VALUE %s + +// CHECK-12: "-cc1" {{.*}}"-mtls-size=12" +// CHECK-24: "-cc1" {{.*}}"-mtls-size=24" +// CHECK-32: "-cc1" {{.*}}"-mtls-size=32" +// CHECK-48: "-cc1" {{.*}}"-mtls-size=48" +// UNSUPPORTED-TARGET: error: unsupported option +// INVALID-VALUE: error: invalid integral value Index: llvm/include/llvm/CodeGen/CommandFlags.inc =================================================================== --- llvm/include/llvm/CodeGen/CommandFlags.inc +++ llvm/include/llvm/CodeGen/CommandFlags.inc @@ -238,6 +238,10 @@ cl::desc("Emit functions into separate sections"), cl::init(false)); +static cl::opt TLSSize("tls-size", + cl::desc("Bit size of immediate TLS offsets"), + cl::init(0)); + static cl::opt EmulatedTLS("emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false)); @@ -305,6 +309,7 @@ Options.DataSections = DataSections; Options.FunctionSections = FunctionSections; Options.UniqueSectionNames = UniqueSectionNames; + Options.TLSSize = TLSSize; Options.EmulatedTLS = EmulatedTLS; Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0; Options.ExceptionModel = ExceptionModel; Index: llvm/include/llvm/Target/TargetOptions.h =================================================================== --- llvm/include/llvm/Target/TargetOptions.h +++ llvm/include/llvm/Target/TargetOptions.h @@ -231,6 +231,9 @@ /// noreturn calls, even if TrapUnreachable is true. unsigned NoTrapAfterNoreturn : 1; + /// Bit size of immediate TLS offsets (0 == use the default). + unsigned TLSSize : 8; + /// EmulatedTLS - This flag enables emulated TLS model, using emutls /// function in the runtime library.. unsigned EmulatedTLS : 1; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -686,6 +686,8 @@ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, + const SDLoc &DL, SelectionDAG &DAG) const; SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, SelectionDAG &DAG) const; SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4529,6 +4529,97 @@ return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1)); } +/// Convert a thread-local variable reference into a sequence of instructions to +/// compute the variable's address for the local exec TLS model of ELF targets. +/// The sequence depends on the maximum TLS area size. +SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV, + SDValue ThreadBase, + const SDLoc &DL, + SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue TPOff, Addr; + + switch (DAG.getTarget().Options.TLSSize) { + default: + llvm_unreachable("Unexpected TLS size"); + + case 12: { + // mrs x0, TPIDR_EL0 + // add x0, x0, :tprel_lo12:a + SDValue Var = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF); + return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, + Var, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + } + + case 24: { + // mrs x0, TPIDR_EL0 + // add x0, x0, :tprel_hi12:a + // add x0, x0, :tprel_lo12_nc:a + SDValue HiVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); + SDValue LoVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, + HiVar, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, + LoVar, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + } + + case 32: { + // mrs x1, TPIDR_EL0 + // movz x0, #:tprel_g1:a + // movk x0, #:tprel_g0_nc:a + // add x0, x1, x0 + SDValue HiVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + SDValue LoVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, + DAG.getTargetConstant(16, DL, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); + } + + case 48: { + // mrs x1, TPIDR_EL0 + // movz x0, #:tprel_g2:a + // movk x0, #:tprel_g1_nc:a + // movk x0, #:tprel_g0_nc:a + // add x0, x1, x0 + SDValue HiVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2); + SDValue MiVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC); + SDValue LoVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, + DAG.getTargetConstant(32, DL, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar, + DAG.getTargetConstant(16, DL, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); + } + } +} + /// When accessing thread-local variables under either the general-dynamic or /// local-dynamic system, we make a "TLS-descriptor" call. The variable will /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry @@ -4566,15 +4657,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "This function expects an ELF target"); - if (getTargetMachine().getCodeModel() == CodeModel::Large) - report_fatal_error("ELF TLS only supported in small memory model"); - // Different choices can be made for the maximum size of the TLS area for a - // module. For the small address model, the default TLS size is 16MiB and the - // maximum TLS size is 4GiB. - // FIXME: add -mtls-size command line option and make it control the 16MiB - // vs. 4GiB code sequence generation. - // FIXME: add tiny codemodel support. We currently generate the same code as - // small, which may be larger than needed. + const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); @@ -4584,6 +4667,17 @@ Model = TLSModel::GeneralDynamic; } + if (getTargetMachine().getCodeModel() == CodeModel::Large && + Model != TLSModel::LocalExec) + report_fatal_error("ELF TLS only supported in small memory model or " + "in local exec TLS model"); + // Different choices can be made for the maximum size of the TLS area for a + // module. For the small address model, the default TLS size is 16MiB and the + // maximum TLS size is 4GiB. + // FIXME: add tiny and large code model support for TLS access models other + // than local exec. We currently generate the same code as small for tiny, + // which may be larger than needed. + SDValue TPOff; EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); @@ -4592,23 +4686,7 @@ SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); if (Model == TLSModel::LocalExec) { - SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); - SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, - AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - SDValue TPWithOff_lo = - SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, - HiVar, - DAG.getTargetConstant(0, DL, MVT::i32)), - 0); - SDValue TPWithOff = - SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, - LoVar, - DAG.getTargetConstant(0, DL, MVT::i32)), - 0); - return TPWithOff; + return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG); } else if (Model == TLSModel::InitialExec) { TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -288,6 +288,17 @@ this->Options.TrapUnreachable = true; } + if (this->Options.TLSSize == 0) // default + this->Options.TLSSize = 24; + if ((getCodeModel() == CodeModel::Small || + getCodeModel() == CodeModel::Kernel) && + this->Options.TLSSize > 32) + // for the small (and kernel) code model, the maximum TLS size is 4GiB + this->Options.TLSSize = 32; + else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24) + // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB) + this->Options.TLSSize = 24; + // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is // MachO/CodeModel::Large, which GlobalISel does not support. if (getOptLevel() <= EnableGlobalISelAtO && Index: llvm/test/CodeGen/AArch64/arm64-tls-initial-exec.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-tls-initial-exec.ll +++ llvm/test/CodeGen/AArch64/arm64-tls-initial-exec.ll @@ -49,48 +49,3 @@ ; CHECK-TINY-RELOC: R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 } - -@local_exec_var = thread_local(localexec) global i32 0 - -define i32 @test_local_exec() { -; CHECK-LABEL: test_local_exec: - %val = load i32, i32* @local_exec_var - -; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 -; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var -; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var -; CHECK: ldr w0, [x[[R3]]] - -; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 -; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC - -; CHECK-TINY: mrs x[[R1:[0-9]+]], TPIDR_EL0 -; CHECK-TINY: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var -; CHECK-TINY: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var -; CHECK-TINY: ldr w0, [x[[R3]]] - -; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 -; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC - ret i32 %val -} - -define i32* @test_local_exec_addr() { -; CHECK-LABEL: test_local_exec_addr: - ret i32* @local_exec_var - -; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 -; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var -; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var -; CHECK: ret - -; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 -; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC - -; CHECK-TINY: mrs x[[R1:[0-9]+]], TPIDR_EL0 -; CHECK-TINY: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var -; CHECK-TINY: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var -; CHECK-TINY: ret - -; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 -; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC -} Index: llvm/test/CodeGen/AArch64/arm64-tls-local-exec.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64-tls-local-exec.ll @@ -0,0 +1,106 @@ +; Test each TLS size option +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -tls-size=12 < %s | FileCheck %s --check-prefix=CHECK-12 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -tls-size=12 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-12-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=tiny -tls-size=24 < %s | FileCheck %s --check-prefix=CHECK-24 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=tiny -tls-size=24 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=small -tls-size=32 < %s | FileCheck %s --check-prefix=CHECK-32 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=small -tls-size=32 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-32-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=large -tls-size=48 < %s | FileCheck %s --check-prefix=CHECK-48 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=large -tls-size=48 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-48-RELOC %s +; +; Test the maximum TLS size for each code model (fallback to a smaller size from the specified size) +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -tls-size=32 < %s | FileCheck %s --check-prefix=CHECK-32 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -tls-size=32 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-32-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=tiny -tls-size=32 < %s | FileCheck %s --check-prefix=CHECK-24 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=tiny -tls-size=32 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=small -tls-size=48 < %s | FileCheck %s --check-prefix=CHECK-32 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=small -tls-size=48 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-32-RELOC %s +; +; Test the default TLS size for each code model +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck --check-prefix=CHECK-24 %s +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=tiny < %s | FileCheck %s --check-prefix=CHECK-24 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=tiny | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=small < %s | FileCheck %s --check-prefix=CHECK-24 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=small | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=large < %s | FileCheck %s --check-prefix=CHECK-24 +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=large | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s + +@local_exec_var = thread_local(localexec) global i32 0 + +define i32 @test_local_exec() { +; CHECK-LABEL: test_local_exec: + %val = load i32, i32* @local_exec_var + +; CHECK-12: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-12: add x[[R2:[0-9]+]], x[[R1]], :tprel_lo12:local_exec_var +; CHECK-12: ldr w0, [x[[R2]]] + +; CHECK-12-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12 + +; CHECK-24: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-24: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK-24: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK-24: ldr w0, [x[[R3]]] + +; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC + +; CHECK-32: movz x[[R2:[0-9]+]], #:tprel_g1:local_exec_var +; CHECK-32: movk x[[R2]], #:tprel_g0_nc:local_exec_var +; CHECK-32: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-32: ldr w0, [x[[R1]], x[[R2]]] + +; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 +; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC + +; CHECK-48: movz x[[R2:[0-9]+]], #:tprel_g2:local_exec_var +; CHECK-48: movk x[[R2]], #:tprel_g1_nc:local_exec_var +; CHECK-48: movk x[[R2]], #:tprel_g0_nc:local_exec_var +; CHECK-48: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-48: ldr w0, [x[[R1]], x[[R2]]] + +; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G2 +; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC +; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC + ret i32 %val +} + +define i32* @test_local_exec_addr() { +; CHECK-LABEL: test_local_exec_addr: + ret i32* @local_exec_var + +; CHECK-12: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-12: add x0, x[[R1]], :tprel_lo12:local_exec_var +; CHECK-12: ret + +; CHECK-12-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12 + +; CHECK-24: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-24: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK-24: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK-24: ret + +; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC + +; CHECK-32: movz x[[R2:[0-9]+]], #:tprel_g1:local_exec_var +; CHECK-32: movk x[[R2]], #:tprel_g0_nc:local_exec_var +; CHECK-32: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-32: add x0, x[[R1]], x[[R2]] +; CHECK-32: ret + +; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 +; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC + +; CHECK-48: movz x[[R2:[0-9]+]], #:tprel_g2:local_exec_var +; CHECK-48: movk x[[R2]], #:tprel_g1_nc:local_exec_var +; CHECK-48: movk x[[R2]], #:tprel_g0_nc:local_exec_var +; CHECK-48: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK-48: add x0, x[[R1]], x[[R2]] +; CHECK-48: ret + +; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G2 +; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC +; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +}