Index: llvm/lib/Target/M68k/M68kExpandPseudo.cpp =================================================================== --- llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -81,6 +81,23 @@ default: return false; + case M68k::Pseudo_TLSGetAddr: + BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ea)) + .addUse(M68k::SP) + .add(MI.getOperand(0)); + BuildMI(MBB, MBBI, DL, TII->get(M68k::BSR32)) + .addExternalSymbol("__tls_get_addr", M68kII::MO_PLT); + BuildMI(MBB, MBBI, DL, TII->get(M68k::ADD32ai)) + .addDef(M68k::SP) + .addUse(M68k::SP) + .addImm(4); + MBBI->eraseFromParent(); + return true; + case M68k::Pseudo_TLSReadTp: + BuildMI(MBB, MBBI, DL, TII->get(M68k::CALLb)) + .addExternalSymbol("__m68k_read_tp"); + MBBI->eraseFromParent(); + return true; case M68k::MOVXd16d8: return TII->ExpandMOVX_RR(MIB, MVT::i16, MVT::i8); case M68k::MOVXd32d8: Index: llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp +++ llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp @@ -226,7 +226,6 @@ bool SelectAL(SDNode *Parent, SDValue N, SDValue &Sym); bool SelectPCD(SDNode *Parent, SDValue N, SDValue &Imm); bool SelectPCI(SDNode *Parent, SDValue N, SDValue &Imm, SDValue &Index); - // If Address Mode represents Frame Index store FI in Disp and // Displacement bit size in Base. These values are read symmetrically by // M68kRegisterInfo::eliminateFrameIndex method @@ -497,6 +496,12 @@ return true; } break; + case ISD::TargetGlobalTLSAddress: { + auto *GA = cast(N); + AM.GV = GA->getGlobal(); + AM.SymbolFlags = GA->getTargetFlags(); + return true; + } } return matchAddressBase(N, AM); @@ -663,9 +668,30 @@ default: break; + case ISD::GLOBAL_OFFSET_TABLE: { + auto GOT = CurDAG->getTargetExternalSymbol("_GLOBAL_OFFSET_TABLE_", + MVT::i32, M68kII::MO_GOTPCREL); + auto *Res = CurDAG->getMachineNode(M68k::LEA32q, DL, MVT::i32, GOT); + ReplaceNode(Node, Res); + return; + } + case M68kISD::GLOBAL_BASE_REG: ReplaceNode(Node, getGlobalBaseReg()); return; + case M68kISD::TLSGetAddr: { + auto *Res = + CurDAG->getMachineNode(M68k::Pseudo_TLSGetAddr, DL, + Node->getValueType(0), Node->getOperand(0)); + ReplaceNode(Node, Res); + return; + } + case M68kISD::TLSReadTp: { + auto *Res = CurDAG->getMachineNode(M68k::Pseudo_TLSReadTp, DL, + Node->getValueType(0)); + ReplaceNode(Node, Res); + return; + } } SelectCode(Node); Index: llvm/lib/Target/M68k/M68kISelLowering.h =================================================================== --- llvm/lib/Target/M68k/M68kISelLowering.h +++ llvm/lib/Target/M68k/M68kISelLowering.h @@ -94,6 +94,10 @@ // segmented stacks. Check if the current stacklet has enough space, and // falls back to heap allocation if not. SEG_ALLOCA, + + /// Thread local storage support. + TLSGetAddr, + TLSReadTp, }; } // namespace M68kISD @@ -231,6 +235,7 @@ const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; /// LowerFormalArguments - transform physical registers into virtual /// registers and generate load operations for arguments places on the stack. Index: llvm/lib/Target/M68k/M68kISelLowering.cpp =================================================================== --- llvm/lib/Target/M68k/M68kISelLowering.cpp +++ llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -1389,7 +1389,124 @@ return LowerShiftRightParts(Op, DAG, true); case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false); - } + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + } +} + +static SDValue getTLSGetAddr(GlobalAddressSDNode *GA, SelectionDAG &DAG, + unsigned TargetFlags) { + // In general dynamic and local dynamic access models, to access a variable in + // Thread Local Storage (TLS), we typically use the function __tls_get_addr. + // This function has the following signature: + // struct tls_index { unsigned module; unsigned offset; }; + // void *__tls_get_addr(struct tls_index *index). + // Each variable in TLS has a separate tls_index associated with it, which is + // located in the Global Offset Table (GOT). + + // The following sequence is used to obtain the address of the tls_index in + // the GOT that is used to call __tls_get_addr. + + // 1. Get the address of the Global Offset Table. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); + + // 2. Obtain the address of the slot in GOT that corresponds to the global + // value by adding the address of GOT to the offset that is fixed by linker. + SDValue TGA = DAG.getTargetGlobalAddress( + GA->getGlobal(), GA, GA->getValueType(0), GA->getOffset(), TargetFlags); + SDValue Arg = DAG.getNode(M68kISD::ADD, SDLoc(GA), MVT::i32, GOT, TGA); + + // 3. Just call `__tls_get_addr` + return DAG.getNode(M68kISD::TLSGetAddr, SDLoc(GA), GA->getValueType(0), Arg); +} + +static SDValue LowerTLSGeneralDynamic(GlobalAddressSDNode *GA, + SelectionDAG &DAG) { + return getTLSGetAddr(GA, DAG, M68kII::MO_TLSGD); +} + +static SDValue LowerTLSLocalDynamic(GlobalAddressSDNode *GA, + SelectionDAG &DAG) { + // The difference between General Dynamic and Local Dynamic is that Local + // Dynamic requires an additional step of adding an offset (fixed by linker as + // well) to obtain the address of a variable in Thread Local Storage. + // The goal of Local Dynamic is to reduce the number of calls to + // __tls_get_addr by obtaining the starting address of the Thread Local + // Storage and adding a separate offset to access the corresponding + // variable. + // For example, we have `a` and `b` at offset 4 and 8 separately: + // -------- <------ starting address of TLS + // | | + // | a | + // | b | + // | | + // -------- + // Calling `__tls_get_addr` once to get the starting address + // and by adding 4 and 8 can we get the address of a and b. + // However, we are still unable to achieve this goal since we generate a + // `__tls_get_addr` call for each Local Dynamic access. + // To achieve this, we need to analyze and eliminate redundant __tls_get_addr + // calls. + SDValue Addr = getTLSGetAddr(GA, DAG, M68kII::MO_TLSLDM); + SDValue TGA = + DAG.getTargetGlobalAddress(GA->getGlobal(), GA, GA->getValueType(0), + GA->getOffset(), M68kII::MO_TLSLD); + return DAG.getNode(M68kISD::ADD, SDLoc(GA), MVT::i32, TGA, Addr); +} + +static SDValue LowerTLSInitialExec(GlobalAddressSDNode *GA, SelectionDAG &DAG) { + // The main difference between (initial|local) exec and the (global|local) + // dynamic is that former and be access directly from thread pointer. + + // In initial exec, the position of the variable is unknown until it is + // loaded. So, we still need to allocate a slot in the GOT to allow the loader + // to fill in the correct offset for us. + + // This sequence is used to obtain the offset in the GOT + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); + SDValue Tp = DAG.getNode(M68kISD::TLSReadTp, SDLoc(GA), MVT::i32); + SDValue TGA = + DAG.getTargetGlobalAddress(GA->getGlobal(), GA, GA->getValueType(0), + GA->getOffset(), M68kII::MO_TLSIE); + SDValue Addr = DAG.getNode(M68kISD::ADD, SDLoc(GA), MVT::i32, TGA, GOT); + SDValue Offset = + DAG.getLoad(MVT::i32, SDLoc(GA), DAG.getEntryNode(), Addr, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + + // Add the thread pointer to the offset to obtain the address of the variable. + return DAG.getNode(M68kISD::ADD, SDLoc(GA), MVT::i32, Offset, Tp); +} + +static SDValue LowerTLSLocalExec(GlobalAddressSDNode *GA, SelectionDAG &DAG) { + // THe difference between the local exec and initial exec is that the + // offset to the thread pointer is fixed by linker, so we no longer need + // to allocate a slot in GOT. + SDValue Tp = DAG.getNode(M68kISD::TLSReadTp, SDLoc(GA), MVT::i32); + SDValue TGA = + DAG.getTargetGlobalAddress(GA->getGlobal(), GA, GA->getValueType(0), + GA->getOffset(), M68kII::MO_TLSLE); + return DAG.getNode(M68kISD::ADD, SDLoc(GA), MVT::i32, TGA, Tp); +} + +SDValue M68kTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.isTargetELF()); + + auto *GA = cast(Op); + TLSModel::Model AccessModel = DAG.getTarget().getTLSModel(GA->getGlobal()); + + switch (AccessModel) { + case TLSModel::GeneralDynamic: + return LowerTLSGeneralDynamic(GA, DAG); + case TLSModel::LocalDynamic: + return LowerTLSLocalDynamic(GA, DAG); + case TLSModel::InitialExec: + return LowerTLSInitialExec(GA, DAG); + case TLSModel::LocalExec: + return LowerTLSLocalExec(GA, DAG); + } + + llvm_unreachable("should not be here"); } bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, @@ -3541,6 +3658,10 @@ return "M68kISD::WrapperPC"; case M68kISD::SEG_ALLOCA: return "M68kISD::SEG_ALLOCA"; + case M68kISD::TLSGetAddr: + return "M68kISD::TLSGetAddr"; + case M68kISD::TLSReadTp: + return "M68kISD::TLSReadTp"; default: return NULL; } Index: llvm/lib/Target/M68k/M68kInstrArithmetic.td =================================================================== --- llvm/lib/Target/M68k/M68kInstrArithmetic.td +++ llvm/lib/Target/M68k/M68kInstrArithmetic.td @@ -312,6 +312,12 @@ defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>; defm SUB : MxBiArOp_AF<"suba", MxSub, 0x9>; +// This pattern is used to enable the instruction selector to select ADD32ab +// for global values that are allocated in thread-local storage, i.e.: +// t8: i32 = M68kISD::ADD GLOBAL_OFFSET_TABLE, TargetGlobalTLSAddress:i32 +// ====> +// t8: i32,i8 = ADD32ab GLOBAL_OFFSET_TABLE, TargetGlobalTLSAddress:i32 +def : Pat<(MxAdd MxARD32:$src, MxCP_AL:$opd), (ADD32ab MxARD32:$src, MxAL32:$opd)>; let Uses = [CCR], Defs = [CCR] in { let Constraints = "$src = $dst" in { Index: llvm/lib/Target/M68k/M68kInstrCompiler.td =================================================================== --- llvm/lib/Target/M68k/M68kInstrCompiler.td +++ llvm/lib/Target/M68k/M68kInstrCompiler.td @@ -124,3 +124,14 @@ let usesCustomInserter = 1 in def SALLOCA : MxPseudo<(outs MxARD32:$dst), (ins MxARD32:$size), [(set iPTR:$dst, (MxSegAlloca iPTR:$size))]>; + +//===----------------------------------------------------------------------===// +// Thread local storage support +//===----------------------------------------------------------------------===// + +let Defs = [A0], isCall = 1 in { + +def Pseudo_TLSGetAddr: MxPseudo<(outs), (ins XR32:$src)>; +def Pseudo_TLSReadTp : MxPseudo<(outs), (ins)>; + +} Index: llvm/lib/Target/M68k/M68kInstrInfo.cpp =================================================================== --- llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -809,7 +809,12 @@ {MO_GOT, "m68k-got"}, {MO_GOTOFF, "m68k-gotoff"}, {MO_GOTPCREL, "m68k-gotpcrel"}, - {MO_PLT, "m68k-plt"}}; + {MO_PLT, "m68k-plt"}, + {MO_TLSGD, "m68k-tlsgd"}, + {MO_TLSLD, "m68k-tlsld"}, + {MO_TLSLDM, "m68k-tlsldm"}, + {MO_TLSIE, "m68k-tlsie"}, + {MO_TLSLE, "m68k-tlsle"}}; return ArrayRef(TargetFlags); } Index: llvm/lib/Target/M68k/M68kMCInstLower.cpp =================================================================== --- llvm/lib/Target/M68k/M68kMCInstLower.cpp +++ llvm/lib/Target/M68k/M68kMCInstLower.cpp @@ -96,6 +96,21 @@ case M68kII::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break; + case M68kII::MO_TLSGD: + RefKind = MCSymbolRefExpr::VK_TLSGD; + break; + case M68kII::MO_TLSLD: + RefKind = MCSymbolRefExpr::VK_TLSLD; + break; + case M68kII::MO_TLSLDM: + RefKind = MCSymbolRefExpr::VK_TLSLDM; + break; + case M68kII::MO_TLSIE: + RefKind = MCSymbolRefExpr::VK_GOTTPOFF; + break; + case M68kII::MO_TLSLE: + RefKind = MCSymbolRefExpr::VK_TPOFF; + break; } if (!Expr) { Index: llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h =================================================================== --- llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h +++ llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h @@ -131,6 +131,12 @@ /// /// name@PLT MO_PLT, + + MO_TLSGD, + MO_TLSLD, + MO_TLSLDM, + MO_TLSIE, + MO_TLSLE, }; // enum TOF /// Return true if the specified TargetFlag operand is a reference to a stub Index: llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp =================================================================== --- llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp +++ llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp @@ -67,9 +67,29 @@ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Kind = Fixup.getKind(); M68kRelType Type = getType(Kind, Modifier, IsPCRel); + +#define MAP_RELOC_TYPE(KIND, TYPE) \ + case MCSymbolRefExpr::KIND: \ + switch (Type) { \ + case RT_32: \ + return ELF::TYPE##32; \ + case RT_16: \ + return ELF::TYPE##16; \ + case RT_8: \ + return ELF::TYPE##8; \ + } \ + llvm_unreachable("Unrecognized size"); + switch (Modifier) { default: llvm_unreachable("Unimplemented"); + + MAP_RELOC_TYPE(VK_TLSGD, R_68K_TLS_GD) + MAP_RELOC_TYPE(VK_TLSLDM, R_68K_TLS_LDM) + MAP_RELOC_TYPE(VK_TLSLD, R_68K_TLS_LDO) + MAP_RELOC_TYPE(VK_GOTTPOFF, R_68K_TLS_IE) + MAP_RELOC_TYPE(VK_TPOFF, R_68K_TLS_LE) + case MCSymbolRefExpr::VK_None: switch (Type) { case RT_32: Index: llvm/test/CodeGen/M68k/TLS/tlsgd.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/M68k/TLS/tlsgd.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=m68k --relocation-model=pic -o - %s | FileCheck --check-prefix=CODEGEN %s +; RUN: llc -mtriple=m68k --relocation-model=pic -o - %s --filetype=obj \ +; RUN: | llvm-readobj -r - | FileCheck --check-prefix=RELOC %s + +@myvar = external thread_local global i32, align 4 + +define ptr @get_addr() { +; CODEGEN-LABEL: get_addr: +; CODEGEN: .cfi_startproc +; CODEGEN-NEXT: ; %bb.0: ; %entry +; CODEGEN-NEXT: lea (_GLOBAL_OFFSET_TABLE_@GOTPCREL,%pc), %a0 +; CODEGEN-NEXT: adda.l myvar@TLSGD, %a0 +; CODEGEN-NEXT: move.l %a0, -(%sp) +; CODEGEN-NEXT: bsr.l __tls_get_addr@PLT +; CODEGEN-NEXT: adda.l #4, %sp +; CODEGEN-NEXT: move.l %a0, %d0 +; CODEGEN-NEXT: rts + +; RELOC-LABEL: Section (3) .rela.text +; RELOC-NEXT: 0x2 R_68K_GOTPCREL16 _GLOBAL_OFFSET_TABLE_ 0x0 +; RELOC-NEXT: 0x6 R_68K_TLS_GD32 myvar 0x0 +; RELOC-NEXT: 0xE R_68K_PLT32 __tls_get_addr 0x0 +entry: + %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @myvar) + ret ptr %0 +} + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) Index: llvm/test/CodeGen/M68k/TLS/tlsie.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/M68k/TLS/tlsie.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=m68k -o - %s | FileCheck --check-prefix=CODEGEN %s +; RUN: llc -mtriple=m68k -o - %s --filetype=obj \ +; RUN: | llvm-readobj -r - | FileCheck --check-prefix=RELOC %s + +@myvar = external thread_local global i32, align 4 + +define dso_local ptr @get_addr() { +; CODEGEN-LABEL: get_addr: +; CODEGEN: .cfi_startproc +; CODEGEN-NEXT: ; %bb.0: ; %entry +; CODEGEN-NEXT: lea (_GLOBAL_OFFSET_TABLE_@GOTPCREL,%pc), %a1 +; CODEGEN-NEXT: adda.l myvar@GOTTPOFF, %a1 +; CODEGEN-NEXT: jsr __m68k_read_tp +; CODEGEN-NEXT: move.l %a0, %d0 +; CODEGEN-NEXT: add.l (%a1), %d0 +; CODEGEN-NEXT: rts + +; RELOC-LABEL: Section (3) .rela.text +; RELOC-DAG: 0xC R_68K_32 __m68k_read_tp 0x0 +; RELOC-DAG: 0x2 R_68K_GOTPCREL16 _GLOBAL_OFFSET_TABLE_ 0x0 +; RELOC-DAG: 0x6 R_68K_TLS_IE32 myvar 0x0 +entry: + %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @myvar) + ret ptr %0 +} + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) Index: llvm/test/CodeGen/M68k/TLS/tlsld.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/M68k/TLS/tlsld.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=m68k --relocation-model=pic -o - %s | FileCheck --check-prefix=CODEGEN %s +; RUN: llc -mtriple=m68k --relocation-model=pic -o - %s --filetype=obj \ +; RUN: | llvm-readobj -r - | FileCheck --check-prefix=RELOC %s + +@myvar = internal thread_local global i32 2, align 4 + +define dso_local ptr @get_addr() { +; CODEGEN-LABEL: get_addr: +; CODEGEN: .Lget_addr$local: +; CODEGEN-NEXT: .type .Lget_addr$local,@function +; CODEGEN-NEXT: .cfi_startproc +; CODEGEN-NEXT: ; %bb.0: ; %entry +; CODEGEN-NEXT: lea (_GLOBAL_OFFSET_TABLE_@GOTPCREL,%pc), %a0 +; CODEGEN-NEXT: adda.l myvar@TLSLDM, %a0 +; CODEGEN-NEXT: move.l %a0, -(%sp) +; CODEGEN-NEXT: bsr.l __tls_get_addr@PLT +; CODEGEN-NEXT: adda.l #4, %sp +; CODEGEN-NEXT: adda.l myvar@TLSLD, %a0 +; CODEGEN-NEXT: move.l %a0, %d0 +; CODEGEN-NEXT: rts + +; RELOC-LABEL: Section (3) .rela.text { +; RELOC-NEXT: 0x2 R_68K_GOTPCREL16 _GLOBAL_OFFSET_TABLE_ 0x0 +; RELOC-NEXT: 0x6 R_68K_TLS_LDM32 myvar 0x0 +; RELOC-NEXT: 0xE R_68K_PLT32 __tls_get_addr 0x0 +; RELOC-NEXT: 0x1A R_68K_TLS_LDO32 myvar 0x0 +entry: + %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @myvar) + ret ptr %0 +} + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) Index: llvm/test/CodeGen/M68k/TLS/tlsle.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/M68k/TLS/tlsle.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=m68k -o - %s | FileCheck --check-prefix=CODEGEN %s +; RUN: llc -mtriple=m68k -o - %s --filetype=obj \ +; RUN: | llvm-readobj -r - | FileCheck --check-prefix=RELOC %s + +@myvar = internal thread_local global i32 2, align 4 + +define dso_local ptr @get_addr() { +; CODEGEN-LABEL: get_addr: +; CODEGEN: .cfi_startproc +; CODEGEN-NEXT: ; %bb.0: ; %entry +; CODEGEN-NEXT: jsr __m68k_read_tp +; CODEGEN-NEXT: adda.l myvar@TPOFF, %a0 +; CODEGEN-NEXT: move.l %a0, %d0 +; CODEGEN-NEXT: rts + +; RELOC-LABEL: Section (3) .rela.text +; RELOC-DAG: 0x2 R_68K_32 __m68k_read_tp 0x0 +; RELOC-DAG: 0x8 R_68K_TLS_LE32 myvar 0x0 + +entry: + %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @myvar) + ret ptr %0 +} + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)