Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -853,6 +853,7 @@ return true; } + case AArch64::LOADgot32: case AArch64::LOADgot: { // Expand into ADRP + LDR. unsigned DstReg = MI.getOperand(0).getReg(); @@ -861,9 +862,14 @@ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) - .add(MI.getOperand(0)) - .addReg(DstReg); + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == AArch64::LOADgot32 ? AArch64::LDRWui + : AArch64::LDRXui)); + if (Opcode == AArch64::LOADgot32) + MIB2 = MIB2.addReg(getWRegFromXReg(DstReg)); + else + MIB2 = MIB2.add(MI.getOperand(0)); + MIB2 = MIB2.addReg(DstReg); if (MO1.isGlobal()) { MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); @@ -924,6 +930,34 @@ return true; } + case AArch64::ADDsecrel: { + // Expand into ADD + ADD. + const MachineOperand &GVOperand = MI.getOperand(2); + assert(GVOperand.isGlobal() && "ADDsecrel needs a global"); + const GlobalValue *GV = GVOperand.getGlobal(); + MachineOperand lo12 = MachineOperand::CreateGA( + GV, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF); + MachineOperand hi12 = + MachineOperand::CreateGA(GV, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(hi12) + .addImm(0); + + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(lo12) + .addImm(0); + + transferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; + } + case AArch64::MOVi32imm: return expandMOVImm(MBB, MBBI, 32); case AArch64::MOVi64imm: Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -38,6 +38,7 @@ ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. LOADgot, // Load from automatically generated descriptor (e.g. Global // Offset Table, TLS record). + LOADgot32,// Load 32 bits from automatically generated descriptor RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. BRCOND, // Conditional branch instruction; "b.cond". CSEL, @@ -45,6 +46,7 @@ CSINV, // Conditional select invert. CSNEG, // Conditional select negate. CSINC, // Conditional select increment. + ADDsecrel, // Add the section relative offset of a global variable // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on // ELF. @@ -555,6 +557,7 @@ SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, SelectionDAG &DAG) const; + SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1071,7 +1071,9 @@ case AArch64ISD::CALL: return "AArch64ISD::CALL"; case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; + case AArch64ISD::ADDsecrel: return "AArch64ISD::ADDsecrel"; case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; + case AArch64ISD::LOADgot32: return "AArch64ISD::LOADgot32"; case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; @@ -3981,6 +3983,48 @@ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); } +SDValue +AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); + + SDValue Chain = DAG.getEntryNode(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + + SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64); + + // Load the ThreadLocalStoragePointer from the TEB + // A pointer to the TLS array is located at offset 0x58 from the TEB. + SDValue TLSArray = + DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL)); + TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); + Chain = TLSArray.getValue(1); + + // Load the TLS index from the C runtime + SDValue TLSIndex = DAG.getTargetExternalSymbol("_tls_index", PtrVT, 0); + TLSIndex = DAG.getNode(AArch64ISD::LOADgot32, DL, PtrVT, TLSIndex); + // LOADgot32 only loads 32 bits, but pretends to return an i64 to make + // tablegen not fail. Truncate it to i32 as it should be returned. + TLSIndex = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, TLSIndex); + + // The pointer to the thread's TLS data area is at the TLS Index scaled by 8 + // offset into the TLSArray. + SDValue Slot = DAG.getNode(ISD::SHL, DL, MVT::i32, TLSIndex, + DAG.getConstant(3, DL, MVT::i32)); + Slot = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, Slot); + SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, + DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), + MachinePointerInfo()); + Chain = TLS.getValue(1); + + const auto *GA = cast(Op); + SDValue TGA = DAG.getTargetGlobalAddress( + GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), 0); + // Add the offset from the start of the .tls section (section base). + return DAG.getNode(AArch64ISD::ADDsecrel, DL, PtrVT, TLS, TGA); +} + SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalAddressSDNode *GA = cast(Op); @@ -3991,6 +4035,8 @@ return LowerDarwinGlobalTLSAddress(Op, DAG); if (Subtarget->isTargetELF()) return LowerELFGlobalTLSAddress(Op, DAG); + if (Subtarget->isTargetWindows()) + return LowerWindowsGlobalTLSAddress(Op, DAG); llvm_unreachable("Unexpected platform trying to use TLS"); } Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -165,7 +165,9 @@ // Node definitions. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; +def AArch64addsecrel : SDNode<"AArch64ISD::ADDsecrel", SDTIntBinOp, []>; def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; +def AArch64LOADgot32 : SDNode<"AArch64ISD::LOADgot32", SDTIntUnaryOp>; def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, @@ -363,6 +365,9 @@ def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, Sched<[WriteLDAdr]>; +def LOADgot32 : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), + [(set GPR64:$dst, (AArch64LOADgot32 tglobaladdr:$addr))]>, + Sched<[WriteLDAdr]>; // The MOVaddr instruction should match only when the add is not folded // into a load or store address. @@ -397,6 +402,10 @@ texternalsym:$low))]>, Sched<[WriteAdrAdr]>; +def ADDsecrel + : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$addr), + [(set GPR64:$dst, (AArch64addsecrel GPR64:$src, tglobaltlsaddr:$addr))]>, + Sched<[WriteAdrAdr]>; } // isReMaterializable, isCodeGenOnly def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), @@ -408,6 +417,15 @@ def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; +def : Pat<(AArch64LOADgot32 tglobaltlsaddr:$addr), + (LOADgot32 tglobaltlsaddr:$addr)>; + +def : Pat<(AArch64LOADgot32 texternalsym:$addr), + (LOADgot32 texternalsym:$addr)>; + +def : Pat<(AArch64LOADgot32 tconstpool:$addr), + (LOADgot32 tconstpool:$addr)>; + //===----------------------------------------------------------------------===// // System instructions. //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64MCInstLower.cpp =================================================================== --- lib/Target/AArch64/AArch64MCInstLower.cpp +++ lib/Target/AArch64/AArch64MCInstLower.cpp @@ -173,11 +173,19 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO, MCSymbol *Sym) const { - MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); + AArch64MCExpr::VariantKind RefKind = AArch64MCExpr::VK_NONE; + if (MO.getTargetFlags() & AArch64II::MO_TLS) { + if (MO.getTargetFlags() & AArch64II::MO_PAGEOFF) + RefKind = AArch64MCExpr::VK_SECREL_LO12; + else if (MO.getTargetFlags() & AArch64II::MO_PAGE) + RefKind = AArch64MCExpr::VK_SECREL_HI12; + } + const MCExpr *Expr = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::createAdd( Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + Expr = AArch64MCExpr::create(Expr, RefKind, Ctx); return MCOperand::createExpr(Expr); } Index: test/CodeGen/AArch64/win-tls.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/win-tls.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple aarch64-windows %s -o - | FileCheck %s + +@tlsVar = thread_local global i32 0 + +define i32 @getVar() { + %1 = load i32, i32* @tlsVar + ret i32 %1 +} + +; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] +; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index +; CHECK: ldr [[TLS_INDEX:w[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; This lsl could ideally be folded into the uxtw below, but that doesn't +; happen right now. +; CHECK: lsl [[TLS_INDEX]], [[TLS_INDEX]], #3 + +; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], [[TLS_INDEX]], uxtw] +; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar +; This add+ldr could also be folded into a single ldr with a :secrel_lo12: +; offset. +; CHECK: add [[TLS]], [[TLS]], :secrel_lo12:tlsVar +; CHECK: ldr w0, {{\[}}[[TLS]]{{\]}}