Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -93,7 +93,12 @@ MO_TOC_LO = 7 << 4, // Symbol for VK_PPC_TLS fixup attached to an ADD instruction - MO_TLS = 8 << 4 + MO_TLS = 8 << 4, + + // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr + // call sequences. + MO_TLSLD = 9 << 4, + MO_TLSGD = 10 << 4 }; } // end namespace PPCII Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -681,35 +681,6 @@ .addExpr(SymGotTlsGD)); return; } - case PPC::GETtlsADDR: - // Transform: %X3 = GETtlsADDR %X3, - // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) - case PPC::GETtlsADDR32: { - // Transform: %R3 = GETtlsADDR32 %R3, - // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT - - StringRef Name = "__tls_get_addr"; - MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); - MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; - - if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && - TM.getRelocationModel() == Reloc::PIC_) - Kind = MCSymbolRefExpr::VK_PLT; - const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, - OutContext); - EmitToStreamer(OutStreamer, - MCInstBuilder(Subtarget.isPPC64() ? - PPC::BL8_NOP_TLS : PPC::BL_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); - return; - } case PPC::ADDIStlsldHA: { // Transform: %Xd = ADDIStlsldHA %X2, // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha @@ -747,36 +718,6 @@ .addExpr(SymGotTlsLD)); return; } - case PPC::GETtlsldADDR: - // Transform: %X3 = GETtlsldADDR %X3, - // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) - case PPC::GETtlsldADDR32: { - // Transform: %R3 = GETtlsldADDR32 %R3, - // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT - - StringRef Name = "__tls_get_addr"; - MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); - MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; - - if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && - TM.getRelocationModel() == Reloc::PIC_) - Kind = MCSymbolRefExpr::VK_PLT; - - const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, - OutContext); - EmitToStreamer(OutStreamer, - MCInstBuilder(Subtarget.isPPC64() ? - PPC::BL8_NOP_TLS : PPC::BL_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); - return; - } case PPC::ADDISdtprelHA: // Transform: %Xd = ADDISdtprelHA %X3, // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -99,6 +99,10 @@ /// SVR4 calls. CALL, CALL_NOP, + /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used + /// to access TLS variables. + CALL_TLS, CALL_NOP_TLS, + /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, @@ -214,10 +218,6 @@ /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, - /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsgd). - GET_TLS_ADDR, - /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. @@ -228,10 +228,6 @@ /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, - /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsld). - GET_TLSLD_ADDR, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the /// local-dynamic TLS model, produces an ADDIS8 instruction /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed @@ -567,6 +563,8 @@ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + std::pair lowerTLSCall(SDValue Op, SDLoc dl, + SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -780,6 +780,8 @@ case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; + case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; + case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; @@ -809,10 +811,8 @@ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; - case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; - case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; @@ -1656,6 +1656,27 @@ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } +// Generate a call to __tls_get_addr for the given GOT entry Op. +std::pair +PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, + SelectionDAG &DAG) const { + + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Op; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, IntPtrTy, + DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), + std::move(Args), 0); + + return LowerCallTo(CLI); +} + SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -1699,7 +1720,8 @@ } if (Model == TLSModel::GeneralDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLSGD); SDValue GOTPtr; if (is64bit) { SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); @@ -1710,26 +1732,13 @@ } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, GOTPtr, TGA); - - // We need a chain node, and don't have one handy. The underlying - // call has no side effects, so using the function entry node - // suffices. - SDValue Chain = DAG.getEntryNode(); - Chain = DAG.getCopyToReg(Chain, dl, - is64bit ? PPC::X3 : PPC::R3, GOTEntry); - SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, - is64bit ? MVT::i64 : MVT::i32); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, - PtrVT, ParmReg, TGA); - // The return value from GET_TLS_ADDR really is in X3 already, but - // some hacks are needed here to tie everything together. The extra - // copies dissolve during subsequent transforms. - Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); - return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT); + std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG); + return CallResult.first; } if (Model == TLSModel::LocalDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLSLD); SDValue GOTPtr; if (is64bit) { SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); @@ -1740,23 +1749,11 @@ } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, GOTPtr, TGA); - - // We need a chain node, and don't have one handy. The underlying - // call has no side effects, so using the function entry node - // suffices. - SDValue Chain = DAG.getEntryNode(); - Chain = DAG.getCopyToReg(Chain, dl, - is64bit ? PPC::X3 : PPC::R3, GOTEntry); - SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, - is64bit ? MVT::i64 : MVT::i32); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, - PtrVT, ParmReg, TGA); - // The return value from GET_TLSLD_ADDR really is in X3 already, but - // some hacks are needed here to tie everything together. The extra - // copies dissolve during subsequent transforms. - Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); + std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG); + SDValue TLSAddr = CallResult.first; + SDValue Chain = CallResult.second; SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, - Chain, ParmReg, TGA); + Chain, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } @@ -3716,6 +3713,23 @@ if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); + + // If this is a call to __tls_get_addr, find the symbol whose address + // is to be taken and add it to the list. This will be used to + // generate __tls_get_addr(@tlsgd) or __tls_get_addr(@tlsld). + // We find the symbol by walking the chain to the CopyFromReg, walking + // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and + // pulling the symbol from that node. + if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + if (!strcmp(S->getSymbol(), "__tls_get_addr")) { + assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); + SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); + SDValue TGTAddr = AddI->getOperand(1); + assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && + "Didn't find target global TLS address where we expected one"); + Ops.push_back(TGTAddr); + CallOpc = PPCISD::CALL_TLS; + } } // If this is a tail call add stack pointer delta. if (isTailCall) @@ -3868,7 +3882,9 @@ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; - } + } else if (CallOpc == PPCISD::CALL_TLS) + // For 64-bit SVR4, TLS calls are always non-local. + CallOpc = PPCISD::CALL_NOP_TLS; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -188,6 +188,9 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym), + (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -876,11 +879,6 @@ [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), - "#GETtlsADDR", - [(set i64:$rD, - (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, - isPPC64; def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, @@ -891,11 +889,6 @@ [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), - "#GETtlsldADDR", - [(set i64:$rD, - (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, - isPPC64; def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -110,10 +110,8 @@ def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; -def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; -def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, [SDNPHasChain]>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; @@ -136,9 +134,15 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, @@ -2426,6 +2430,8 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; +def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym), + (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -2481,18 +2487,10 @@ "#ADDItlsgdL32", [(set i32:$rD, (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; -def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), - "#GETtlsADDR32", - [(set i32:$rD, - (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; -def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), - "#GETtlsldADDR32", - [(set i32:$rD, - (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>; def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, Index: lib/Target/PowerPC/PPCMCInstLower.cpp =================================================================== --- lib/Target/PowerPC/PPCMCInstLower.cpp +++ lib/Target/PowerPC/PPCMCInstLower.cpp @@ -137,6 +137,12 @@ case PPCII::MO_TLS: RefKind = MCSymbolRefExpr::VK_PPC_TLS; break; + case PPCII::MO_TLSGD: + RefKind = MCSymbolRefExpr::VK_PPC_TLSGD; + break; + case PPCII::MO_TLSLD: + RefKind = MCSymbolRefExpr::VK_PPC_TLSLD; + break; } if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) Index: test/CodeGen/PowerPC/tls-store2.ll =================================================================== --- test/CodeGen/PowerPC/tls-store2.ll +++ test/CodeGen/PowerPC/tls-store2.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Test back-to-back stores of TLS variables to ensure call sequences no +; longer overlap. + +@__once_callable = external thread_local global i8** +@__once_call = external thread_local global void ()* + +define i64 @call_once(i64 %flag, i8* %ptr) { +entry: + %var = alloca i8*, align 8 + store i8* %ptr, i8** %var, align 8 + store i8** %var, i8*** @__once_callable, align 8 + store void ()* @__once_call_impl, void ()** @__once_call, align 8 + ret i64 %flag +} + +; CHECK-LABEL: call_once: +; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha +; CHECK: addi 3, 3, __once_callable@got@tlsgd@l +; CHECK: bl __tls_get_addr(__once_callable@tlsgd) +; CHECK-NEXT: nop +; CHECK: std {{[0-9]+}}, 0(3) +; CHECK: addis 3, 2, __once_call@got@tlsgd@ha +; CHECK: addi 3, 3, __once_call@got@tlsgd@l +; CHECK: bl __tls_get_addr(__once_call@tlsgd) +; CHECK-NEXT: nop +; CHECK: std {{[0-9]+}}, 0(3) + +declare void @__once_call_impl()