Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -40,6 +40,7 @@ FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCTLSDynamicCallPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); @@ -93,12 +94,7 @@ MO_TOC_LO = 7 << 4, // Symbol for VK_PPC_TLS fixup attached to an ADD instruction - MO_TLS = 8 << 4, - - // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr - // call sequences. - MO_TLSLD = 9 << 4, - MO_TLSGD = 10 << 4 + MO_TLS = 8 << 4 }; } // end namespace PPCII Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -807,6 +807,35 @@ .addExpr(SymGotTlsGD)); return; } + case PPC::GETtlsADDR: + // Transform: %X3 = GETtlsADDR %X3, + // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) + case PPC::GETtlsADDR32: { + // Transform: %R3 = GettlsADDR32 %R3, + // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + + if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && + TM.getRelocationModel() == Reloc::PIC_) + Kind = MCSymbolRefExpr::VK_PLT; + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + OutContext); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? + PPC::BL8_NOP_TLS : PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } case PPC::ADDIStlsldHA: { // Transform: %Xd = ADDIStlsldHA %X2, // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha @@ -844,6 +873,36 @@ .addExpr(SymGotTlsLD)); return; } + case PPC::GETtlsldADDR: + // Transform: %X3 = GETtlsldADDR %X3, + // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) + case PPC::GETtlsldADDR32: { + // Transform: %R3 = GETtlsldADDR32 %R3, + // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + + if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && + TM.getRelocationModel() == Reloc::PIC_) + Kind = MCSymbolRefExpr::VK_PLT; + + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + OutContext); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? + PPC::BL8_NOP_TLS : PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } case PPC::ADDISdtprelHA: // Transform: %Xd = ADDISdtprelHA %X3, // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -355,6 +355,20 @@ return FuncInfo->hasNonRISpills(); } +/// MustSaveLR - Return true if this function requires that we save the LR +/// register onto the stack in the prolog and restore it in the epilog of the +/// function. +static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { + const PPCFunctionInfo *MFI = MF.getInfo(); + + // We need a save/restore of LR if there is any def of LR (which is + // defined by calls, including the PIC setup sequence), or if there is + // some use of the LR stack slot (e.g. for builtin_return_address). + // (LR comes in 32 and 64 bit versions.) + MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); + return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, @@ -381,6 +395,7 @@ // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); + unsigned LR = RegInfo->getRARegister(); if (!DisableRedZone && (Subtarget.isPPC64() || // 32-bit SVR4, no stack- !Subtarget.isSVR4ABI() || // allocated locals. @@ -388,6 +403,7 @@ FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && !RegInfo->hasBasePointer(MF)) { // No special alignment. // No need for frame if (UpdateMF) @@ -1108,20 +1124,6 @@ } } -/// MustSaveLR - Return true if this function requires that we save the LR -/// register onto the stack in the prolog and restore it in the epilog of the -/// function. -static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { - const PPCFunctionInfo *MFI = MF.getInfo(); - - // We need a save/restore of LR if there is any def of LR (which is - // defined by calls, including the PIC setup sequence), or if there is - // some use of the LR stack slot (e.g. for builtin_return_address). - // (LR comes in 32 and 64 bit versions.) - MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); - return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); -} - void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *) const { Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -101,10 +101,6 @@ /// SVR4 calls. CALL, CALL_NOP, - /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used - /// to access TLS variables. - CALL_TLS, CALL_NOP_TLS, - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, @@ -228,6 +224,10 @@ /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, + /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsgd). + GET_TLS_ADDR, + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. @@ -238,11 +238,13 @@ /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the - /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed - /// to tie this in place following a copy to %X3 from the result - /// of a GET_TLSLD_ADDR. + /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsld). + GET_TLSLD_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds X3 to + /// sym\@dtprel\@ha. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS @@ -635,8 +637,6 @@ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - std::pair lowerTLSCall(SDValue Op, SDLoc dl, - SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -791,8 +791,6 @@ case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; - case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; - case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; @@ -826,8 +824,10 @@ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; @@ -1670,27 +1670,6 @@ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } -// Generate a call to __tls_get_addr for the given GOT entry Op. -std::pair -PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, - SelectionDAG &DAG) const { - - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, IntPtrTy, - DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), - std::move(Args), 0); - - return LowerCallTo(CLI); -} - SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -1736,8 +1715,7 @@ } if (Model == TLSModel::GeneralDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLSGD); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); @@ -1749,15 +1727,13 @@ else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, - GOTPtr, TGA); - std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG); - return CallResult.first; + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, + PtrVT, GOTPtr, TGA); + return DAG.getNode(PPCISD::GET_TLS_ADDR, dl, PtrVT, GOTEntry, TGA); } if (Model == TLSModel::LocalDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLSLD); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); @@ -1771,11 +1747,10 @@ } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, GOTPtr, TGA); - std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG); - SDValue TLSAddr = CallResult.first; - SDValue Chain = CallResult.second; - SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, - Chain, TLSAddr, TGA); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, + PtrVT, GOTEntry, TGA); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, + PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } @@ -3794,23 +3769,6 @@ if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); - - // If this is a call to __tls_get_addr, find the symbol whose address - // is to be taken and add it to the list. This will be used to - // generate __tls_get_addr(@tlsgd) or __tls_get_addr(@tlsld). - // We find the symbol by walking the chain to the CopyFromReg, walking - // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and - // pulling the symbol from that node. - if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - if (!strcmp(S->getSymbol(), "__tls_get_addr")) { - assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); - SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); - SDValue TGTAddr = AddI->getOperand(1); - assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && - "Didn't find target global TLS address where we expected one"); - Ops.push_back(TGTAddr); - CallOpc = PPCISD::CALL_TLS; - } } // If this is a tail call add stack pointer delta. if (isTailCall) @@ -3972,12 +3930,9 @@ Ops.insert(std::next(Ops.begin()), AddTOC); } else if ((CallOpc == PPCISD::CALL) && (!isLocalCall(Callee) || - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; - } else if (CallOpc == PPCISD::CALL_TLS) - // For 64-bit SVR4, TLS calls are always non-local. - CallOpc = PPCISD::CALL_NOP_TLS; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -202,9 +202,6 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; -def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym), - (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -904,6 +901,13 @@ [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; +let Defs = [LR8] in { +def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +} def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, @@ -914,6 +918,13 @@ [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; +let Defs = [LR8] in { +def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +} def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2277,3 +2277,88 @@ char PPCEarlyReturn::ID = 0; FunctionPass* llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-tls-dynamic-call" + +namespace llvm { + void initializePPCTLSDynamicCallPass(PassRegistry&); +} + +namespace { + // PPCTLSDynamicCall pass - Add copies to and from GPR3 around + // GETtls[ld]ADDR machine instructions. + struct PPCTLSDynamicCall : public MachineFunctionPass { + static char ID; + PPCTLSDynamicCall() : MachineFunctionPass(ID) { + initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + bool Is64Bit = TM->getSubtargetImpl()->isPPC64(); + + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + + if (MI->getOpcode() != PPC::GETtlsADDR && + MI->getOpcode() != PPC::GETtlsldADDR) + continue; + + DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;); + + unsigned OutReg = MI->getOperand(0).getReg(); + unsigned InReg = MI->getOperand(1).getReg(); + DebugLoc DL = MI->getDebugLoc(); + unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; + + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) + .addReg(InReg); + MI->getOperand(0).setReg(GPR3); + MI->getOperand(1).setReg(GPR3); + BuildMI(MBB, ++I, DL, TII->get(TargetOpcode::COPY), OutReg) + .addReg(GPR3); + + Changed = true; + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast(&MF.getTarget()); + TII = TM->getSubtargetImpl()->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, + "PowerPC TLS Dynamic Call Fixup", false, false) +INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, + "PowerPC TLS Dynamic Call Fixup", false, false) + +//char &llvm::PPCTLSDynamicCallID = PPCTLSDynamicCall::ID; + +char PPCTLSDynamicCall::ID = 0; +FunctionPass* +llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -110,10 +110,11 @@ def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; -def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, - [SDNPHasChain]>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; @@ -136,15 +137,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -2459,9 +2454,6 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; -def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym), - (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -2516,10 +2508,23 @@ "#ADDItlsgdL32", [(set i32:$rD, (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; +let Defs = [LR] in { +def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsADDR32", + [(set i32:$rD, + (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; +} def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +let Defs = [LR] in { +def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsldADDR32", + [(set i32:$rD, + (PPCgetTlsldAddr i32:$reg, + tglobaltlsaddr:$sym))]>; +} def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, Index: lib/Target/PowerPC/PPCMCInstLower.cpp =================================================================== --- lib/Target/PowerPC/PPCMCInstLower.cpp +++ lib/Target/PowerPC/PPCMCInstLower.cpp @@ -137,12 +137,6 @@ case PPCII::MO_TLS: RefKind = MCSymbolRefExpr::VK_PPC_TLS; break; - case PPCII::MO_TLSGD: - RefKind = MCSymbolRefExpr::VK_PPC_TLSGD; - break; - case PPCII::MO_TLSLD: - RefKind = MCSymbolRefExpr::VK_PPC_TLSLD; - break; } if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -262,6 +262,7 @@ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, &PPCVSXFMAMutateID); + addPass(createPPCTLSDynamicCallPass()); } void PPCPassConfig::addPreSched2() { Index: test/CodeGen/PowerPC/retaddr2.ll =================================================================== --- test/CodeGen/PowerPC/retaddr2.ll +++ test/CodeGen/PowerPC/retaddr2.ll @@ -13,7 +13,7 @@ ; CHECK: mflr 0 ; CHECK: std 0, 16(1) ; FIXME: These next two lines don't both need to load the same value. -; CHECK-DAG: ld 3, 16(1) +; CHECK-DAG: ld 3, 64(1) ; CHECK-DAG: ld 0, 16(1) ; CHECK: mtlr 0 ; CHECK: blr Index: test/CodeGen/PowerPC/tls-cse.ll =================================================================== --- test/CodeGen/PowerPC/tls-cse.ll +++ test/CodeGen/PowerPC/tls-cse.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1 + +; This test was derived from LLVM's own +; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an +; opportunity for CSE of calls to __tls_get_addr(). + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* } + +@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8 +@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8 +@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1 +@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1 +@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1 + +declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr +declare void @__cxa_pure_virtual() +declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) +declare void @_ZdlPv(i8*) + +define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + %1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8 + %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this + br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0)) + unreachable + +_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry + %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1 + %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64* + %3 = load i64* %2, align 8 + store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8 + %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8* + tail call void @_ZdlPv(i8* %4) + ret void +} + +; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev: +; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha +; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l +; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld) +; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha +; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) +; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) Index: test/CodeGen/PowerPC/tls-store2.ll =================================================================== --- test/CodeGen/PowerPC/tls-store2.ll +++ test/CodeGen/PowerPC/tls-store2.ll @@ -19,13 +19,11 @@ } ; CHECK-LABEL: call_once: -; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha -; CHECK: addi 3, 3, __once_callable@got@tlsgd@l +; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_callable@tlsgd) ; CHECK-NEXT: nop ; CHECK: std {{[0-9]+}}, 0(3) -; CHECK: addis 3, 2, __once_call@got@tlsgd@ha -; CHECK: addi 3, 3, __once_call@got@tlsgd@l +; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_call@tlsgd) ; CHECK-NEXT: nop ; CHECK: std {{[0-9]+}}, 0(3)