Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -560,6 +560,7 @@ SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, SelectionDAG &DAG) const; + SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3981,6 +3981,67 @@ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); } +SDValue +AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); + + SDValue Chain = DAG.getEntryNode(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + + SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64); + + // Load the ThreadLocalStoragePointer from the TEB + // A pointer to the TLS array is located at offset 0x58 from the TEB. + SDValue TLSArray = + DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL)); + TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); + Chain = TLSArray.getValue(1); + + // Load the TLS index from the C runtime; + // This does the same as getAddr(), but without having a GlobalAddressSDNode. + // This produces the same effect as LOADgot, but using a generic load + // ISD of i32 instead of i64 as LOADgot loads. + SDValue TLSIndexHi = + DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE); + SDValue TLSIndexLo = DAG.getTargetExternalSymbol( + "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi); + SDValue TLSIndex = + DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo); + TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo()); + Chain = TLSIndex.getValue(1); + + // The pointer to the thread's TLS data area is at the TLS Index scaled by 8 + // offset into the TLSArray. + SDValue Slot = DAG.getNode(ISD::SHL, DL, MVT::i32, TLSIndex, + DAG.getConstant(3, DL, MVT::i32)); + Slot = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, Slot); + SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, + DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), + MachinePointerInfo()); + Chain = TLS.getValue(1); + + const GlobalAddressSDNode *GA = cast(Op); + const GlobalValue *GV = GA->getGlobal(); + SDValue TGAHi = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); + SDValue TGALo = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + // Add the offset from the start of the .tls section (section base). + SDValue Addr = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, TGALo, + DAG.getTargetConstant(0, DL, MVT::i32)), + 0); + return Addr; +} + SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalAddressSDNode *GA = cast(Op); @@ -3991,6 +4052,8 @@ return LowerDarwinGlobalTLSAddress(Op, DAG); if (Subtarget->isTargetELF()) return LowerELFGlobalTLSAddress(Op, DAG); + if (Subtarget->isTargetWindows()) + return LowerWindowsGlobalTLSAddress(Op, DAG); llvm_unreachable("Unexpected platform trying to use TLS"); } Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -1331,6 +1331,7 @@ // page address of a constant pool entry, block address def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; +def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Unconditional branch (register) instructions. Index: lib/Target/AArch64/AArch64MCInstLower.cpp =================================================================== --- lib/Target/AArch64/AArch64MCInstLower.cpp +++ lib/Target/AArch64/AArch64MCInstLower.cpp @@ -173,11 +173,20 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO, MCSymbol *Sym) const { - MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); + AArch64MCExpr::VariantKind RefKind = AArch64MCExpr::VK_NONE; + if (MO.getTargetFlags() & AArch64II::MO_TLS) { + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGEOFF) + RefKind = AArch64MCExpr::VK_SECREL_LO12; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_HI12) + RefKind = AArch64MCExpr::VK_SECREL_HI12; + } + const MCExpr *Expr = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::createAdd( Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + Expr = AArch64MCExpr::create(Expr, RefKind, Ctx); return MCOperand::createExpr(Expr); } Index: test/CodeGen/AArch64/win-tls.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/win-tls.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple aarch64-windows %s -o - | FileCheck %s + +@tlsVar = thread_local global i32 0 + +define i32 @getVar() { + %1 = load i32, i32* @tlsVar + ret i32 %1 +} + +; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index +; CHECK: ldr [[TLS_INDEX:w[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] +; This lsl could ideally be folded into the uxtw below, but that doesn't +; happen right now. +; CHECK: lsl [[TLS_INDEX]], [[TLS_INDEX]], #3 + +; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], [[TLS_INDEX]], uxtw] +; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar +; This add+ldr could also be folded into a single ldr with a :secrel_lo12: +; offset. +; CHECK: add [[TLS]], [[TLS]], :secrel_lo12:tlsVar +; CHECK: ldr w0, {{\[}}[[TLS]]{{\]}}