Index: lib/Target/PowerPC/CMakeLists.txt =================================================================== --- lib/Target/PowerPC/CMakeLists.txt +++ lib/Target/PowerPC/CMakeLists.txt @@ -32,6 +32,7 @@ PPCTargetObjectFile.cpp PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp + PPCTLSDynamicCall.cpp PPCVSXCopy.cpp PPCVSXFMAMutate.cpp ) Index: lib/Target/PowerPC/PPC.h =================================================================== --- lib/Target/PowerPC/PPC.h +++ lib/Target/PowerPC/PPC.h @@ -40,6 +40,7 @@ FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCTLSDynamicCallPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); @@ -90,12 +91,7 @@ MO_TOC_LO = 7 << 4, // Symbol for VK_PPC_TLS fixup attached to an ADD instruction - MO_TLS = 8 << 4, - - // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr - // call sequences. - MO_TLSLD = 9 << 4, - MO_TLSGD = 10 << 4 + MO_TLS = 8 << 4 }; } // end namespace PPCII Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -101,6 +101,7 @@ const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); + void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -404,6 +405,39 @@ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); } +/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a +/// call to __tls_get_addr to the current output stream. +void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, + MCSymbolRefExpr::VariantKind VK) { + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + + assert(MI->getOperand(0).isReg() && + ((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) || + (!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) && + "GETtls[ld]ADDR[32] must define GPR3"); + assert(MI->getOperand(1).isReg() && + ((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) || + (!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) && + "GETtls[ld]ADDR[32] must read GPR3"); + + if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && + TM.getRelocationModel() == Reloc::PIC_) + Kind = MCSymbolRefExpr::VK_PLT; + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = getSymbol(GValue); + const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? + PPC::BL8_NOP_TLS : PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); +} + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// @@ -807,6 +841,15 @@ .addExpr(SymGotTlsGD)); return; } + case PPC::GETtlsADDR: + // Transform: %X3 = GETtlsADDR %X3, + // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd) + case PPC::GETtlsADDR32: { + // Transform: %R3 = GETtlsADDR32 %R3, + // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT + EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD); + return; + } case PPC::ADDIStlsldHA: { // Transform: %Xd = ADDIStlsldHA %X2, // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha @@ -844,6 +887,15 @@ .addExpr(SymGotTlsLD)); return; } + case PPC::GETtlsldADDR: + // Transform: %X3 = GETtlsldADDR %X3, + // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld) + case PPC::GETtlsldADDR32: { + // Transform: %R3 = GETtlsldADDR32 %R3, + // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT + EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD); + return; + } case PPC::ADDISdtprelHA: // Transform: %Xd = ADDISdtprelHA %X3, // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -355,6 +355,20 @@ return FuncInfo->hasNonRISpills(); } +/// MustSaveLR - Return true if this function requires that we save the LR +/// register onto the stack in the prolog and restore it in the epilog of the +/// function. +static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { + const PPCFunctionInfo *MFI = MF.getInfo(); + + // We need a save/restore of LR if there is any def of LR (which is + // defined by calls, including the PIC setup sequence), or if there is + // some use of the LR stack slot (e.g. for builtin_return_address). + // (LR comes in 32 and 64 bit versions.) + MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); + return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, @@ -381,6 +395,7 @@ // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); + unsigned LR = RegInfo->getRARegister(); if (!DisableRedZone && (Subtarget.isPPC64() || // 32-bit SVR4, no stack- !Subtarget.isSVR4ABI() || // allocated locals. @@ -388,6 +403,7 @@ FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && !RegInfo->hasBasePointer(MF)) { // No special alignment. // No need for frame if (UpdateMF) @@ -1108,20 +1124,6 @@ } } -/// MustSaveLR - Return true if this function requires that we save the LR -/// register onto the stack in the prolog and restore it in the epilog of the -/// function. -static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { - const PPCFunctionInfo *MFI = MF.getInfo(); - - // We need a save/restore of LR if there is any def of LR (which is - // defined by calls, including the PIC setup sequence), or if there is - // some use of the LR stack slot (e.g. for builtin_return_address). - // (LR comes in 32 and 64 bit versions.) - MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); - return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); -} - void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *) const { Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -101,10 +101,6 @@ /// SVR4 calls. CALL, CALL_NOP, - /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used - /// to access TLS variables. - CALL_TLS, CALL_NOP_TLS, - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, @@ -223,26 +219,46 @@ /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, - /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsgd\@l. + /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by + /// ADDIS_TLSGD_L_ADDR until after register assignment. ADDI_TLSGD_L, + /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by + /// ADDIS_TLSGD_L_ADDR until after register assignment. + GET_TLS_ADDR, + + /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that + /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following + /// register assignment. + ADDI_TLSGD_L_ADDR, + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, - /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsld\@l. + /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by + /// ADDIS_TLSLD_L_ADDR until after register assignment. ADDI_TLSLD_L, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the - /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed - /// to tie this in place following a copy to %X3 from the result - /// of a GET_TLSLD_ADDR. + /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by + /// ADDIS_TLSLD_L_ADDR until after register assignment. + GET_TLSLD_ADDR, + + /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that + /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion + /// following register assignment. + ADDI_TLSLD_L_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds X3 to + /// sym\@dtprel\@ha. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS @@ -635,8 +651,6 @@ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - std::pair lowerTLSCall(SDValue Op, SDLoc dl, - SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -806,8 +806,6 @@ case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; - case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; - case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; @@ -841,8 +839,12 @@ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; + case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; + case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; @@ -1701,27 +1703,6 @@ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } -// Generate a call to __tls_get_addr for the given GOT entry Op. -std::pair -PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, - SelectionDAG &DAG) const { - - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, IntPtrTy, - DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), - std::move(Args), 0); - - return LowerCallTo(CLI); -} - SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -1768,8 +1749,7 @@ } if (Model == TLSModel::GeneralDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLSGD); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); @@ -1782,15 +1762,12 @@ else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, - GOTPtr, TGA); - std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG); - return CallResult.first; + return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, + GOTPtr, TGA, TGA); } if (Model == TLSModel::LocalDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLSLD); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); @@ -1803,13 +1780,10 @@ else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, - GOTPtr, TGA); - std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG); - SDValue TLSAddr = CallResult.first; - SDValue Chain = CallResult.second; - SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, - Chain, TLSAddr, TGA); + SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, + PtrVT, GOTPtr, TGA, TGA); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, + PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } @@ -3833,23 +3807,6 @@ if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); - - // If this is a call to __tls_get_addr, find the symbol whose address - // is to be taken and add it to the list. This will be used to - // generate __tls_get_addr(@tlsgd) or __tls_get_addr(@tlsld). - // We find the symbol by walking the chain to the CopyFromReg, walking - // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and - // pulling the symbol from that node. - if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - if (!strcmp(S->getSymbol(), "__tls_get_addr")) { - assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); - SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); - SDValue TGTAddr = AddI->getOperand(1); - assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && - "Didn't find target global TLS address where we expected one"); - Ops.push_back(TGTAddr); - CallOpc = PPCISD::CALL_TLS; - } } // If this is a tail call add stack pointer delta. if (isTailCall) @@ -4012,12 +3969,9 @@ Ops.insert(std::next(Ops.begin()), AddTOC); } else if ((CallOpc == PPCISD::CALL) && (!isLocalCall(Callee) || - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; - } else if (CallOpc == PPCISD::CALL_TLS) - // For 64-bit SVR4, TLS calls are always non-local. - CallOpc = PPCISD::CALL_NOP_TLS; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -202,9 +202,6 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; -def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym), - (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -904,6 +901,28 @@ [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; +// LR8 is a true define, while the rest of the Defs are clobbers. X3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in +def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8 +// are true defines while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] + in +def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD), + (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), + "#ADDItlsgdLADDR", + [(set i64:$rD, + (PPCaddiTlsgdLAddr i64:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>, + isPPC64; def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, @@ -914,6 +933,28 @@ [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; +// LR8 is a true define, while the rest of the Defs are clobbers. X3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in +def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8 +// are true defines, while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] + in +def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD), + (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), + "#ADDItlsldLADDR", + [(set i64:$rD, + (PPCaddiTlsldLAddr i64:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>, + isPPC64; def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -110,10 +110,19 @@ def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", + SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; -def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, - [SDNPHasChain]>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", + SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; @@ -136,15 +145,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -2459,9 +2462,6 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; -def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym), - (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -2516,10 +2516,49 @@ "#ADDItlsgdL32", [(set i32:$rD, (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; +// LR is a true define, while the rest of the Defs are clobbers. R3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsADDR32", + [(set i32:$rD, + (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; +// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR +// are true defines while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD), + (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), + "#ADDItlsgdLADDR32", + [(set i32:$rD, + (PPCaddiTlsgdLAddr i32:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>; def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +// LR is a true define, while the rest of the Defs are clobbers. R3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsldADDR32", + [(set i32:$rD, + (PPCgetTlsldAddr i32:$reg, + tglobaltlsaddr:$sym))]>; +// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR +// are true defines while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD), + (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), + "#ADDItlsldLADDR32", + [(set i32:$rD, + (PPCaddiTlsldLAddr i32:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>; def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, Index: lib/Target/PowerPC/PPCMCInstLower.cpp =================================================================== --- lib/Target/PowerPC/PPCMCInstLower.cpp +++ lib/Target/PowerPC/PPCMCInstLower.cpp @@ -137,12 +137,6 @@ case PPCII::MO_TLS: RefKind = MCSymbolRefExpr::VK_PPC_TLS; break; - case PPCII::MO_TLSGD: - RefKind = MCSymbolRefExpr::VK_PPC_TLSGD; - break; - case PPCII::MO_TLSLD: - RefKind = MCSymbolRefExpr::VK_PPC_TLSLD; - break; } if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) Index: lib/Target/PowerPC/PPCTLSDynamicCall.cpp =================================================================== --- lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -0,0 +1,170 @@ +//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into +// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of +// which define GPR3. A copy is added from GPR3 to the target virtual +// register of the original instruction. The GETtlsADDR[32] is really +// a call instruction, so its target register is constrained to be GPR3. +// This is not true of ADDItls[gd]L[32], but there is a legacy linker +// optimization bug that requires the target register of the addi of +// a local- or general-dynamic TLS access sequence to be GPR3. +// +// This is done in a late pass so that TLS variable accesses can be +// fully commoned by MachineCSE. +// +//===----------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-tls-dynamic-call" + +namespace llvm { + void initializePPCTLSDynamicCallPass(PassRegistry&); +} + +namespace { + struct PPCTLSDynamicCall : public MachineFunctionPass { + static char ID; + PPCTLSDynamicCall() : MachineFunctionPass(ID) { + initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + LiveIntervals *LIS; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + bool Is64Bit = TM->getSubtargetImpl()->isPPC64(); + + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + + if (MI->getOpcode() != PPC::ADDItlsgdLADDR && + MI->getOpcode() != PPC::ADDItlsldLADDR && + MI->getOpcode() != PPC::ADDItlsgdLADDR32 && + MI->getOpcode() != PPC::ADDItlsldLADDR32) + continue; + + DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;); + + unsigned OutReg = MI->getOperand(0).getReg(); + unsigned InReg = MI->getOperand(1).getReg(); + DebugLoc DL = MI->getDebugLoc(); + unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; + unsigned Opc1, Opc2; + SmallVector OrigRegs; + OrigRegs.push_back(OutReg); + OrigRegs.push_back(InReg); + OrigRegs.push_back(GPR3); + + switch (MI->getOpcode()) { + default: + llvm_unreachable("Opcode inconsistency error"); + case PPC::ADDItlsgdLADDR: + Opc1 = PPC::ADDItlsgdL; + Opc2 = PPC::GETtlsADDR; + break; + case PPC::ADDItlsldLADDR: + Opc1 = PPC::ADDItlsldL; + Opc2 = PPC::GETtlsldADDR; + break; + case PPC::ADDItlsgdLADDR32: + Opc1 = PPC::ADDItlsgdL32; + Opc2 = PPC::GETtlsADDR32; + break; + case PPC::ADDItlsldLADDR32: + Opc1 = PPC::ADDItlsldL32; + Opc2 = PPC::GETtlsldADDR32; + break; + } + + // Expand into two ops built prior to the existing instruction. + MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) + .addReg(InReg); + Addi->addOperand(MI->getOperand(2)); + + // The ADDItls* instruction is the first instruction in the + // repair range. + MachineBasicBlock::iterator First = I; + --First; + + MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3) + .addReg(GPR3)); + Call->addOperand(MI->getOperand(3)); + + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) + .addReg(GPR3); + + // The COPY is the last instruction in the repair range. + MachineBasicBlock::iterator Last = I; + --Last; + + // Move past the original instruction and remove it. + ++I; + MI->removeFromParent(); + + // Repair the live intervals. + LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs); + Changed = true; + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast(&MF.getTarget()); + TII = TM->getSubtargetImpl()->getInstrInfo(); + LIS = &getAnalysis(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, + "PowerPC TLS Dynamic Call Fixup", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, + "PowerPC TLS Dynamic Call Fixup", false, false) + +char PPCTLSDynamicCall::ID = 0; +FunctionPass* +llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -266,6 +266,8 @@ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, &PPCVSXFMAMutateID); + if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_) + addPass(createPPCTLSDynamicCallPass()); } void PPCPassConfig::addPreSched2() { Index: test/CodeGen/PowerPC/retaddr2.ll =================================================================== --- test/CodeGen/PowerPC/retaddr2.ll +++ test/CodeGen/PowerPC/retaddr2.ll @@ -12,8 +12,7 @@ ; CHECK-LABEL: @test1 ; CHECK: mflr 0 ; CHECK: std 0, 16(1) -; FIXME: These next two lines don't both need to load the same value. -; CHECK-DAG: ld 3, 16(1) +; CHECK-DAG: ld 3, 64(1) ; CHECK-DAG: ld 0, 16(1) ; CHECK: mtlr 0 ; CHECK: blr Index: test/CodeGen/PowerPC/tls-cse.ll =================================================================== --- test/CodeGen/PowerPC/tls-cse.ll +++ test/CodeGen/PowerPC/tls-cse.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1 + +; This test was derived from LLVM's own +; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an +; opportunity for CSE of calls to __tls_get_addr(). + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* } + +@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8 +@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8 +@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1 +@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1 +@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1 + +declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr +declare void @__cxa_pure_virtual() +declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) +declare void @_ZdlPv(i8*) + +define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + %1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8 + %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this + br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0)) + unreachable + +_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry + %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1 + %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64* + %3 = load i64* %2, align 8 + store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8 + %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8* + tail call void @_ZdlPv(i8* %4) + ret void +} + +; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev: +; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha +; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l +; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld) +; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha +; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) +; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) Index: test/CodeGen/PowerPC/tls-store2.ll =================================================================== --- test/CodeGen/PowerPC/tls-store2.ll +++ test/CodeGen/PowerPC/tls-store2.ll @@ -19,13 +19,11 @@ } ; CHECK-LABEL: call_once: -; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha -; CHECK: addi 3, 3, __once_callable@got@tlsgd@l +; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_callable@tlsgd) ; CHECK-NEXT: nop ; CHECK: std {{[0-9]+}}, 0(3) -; CHECK: addis 3, 2, __once_call@got@tlsgd@ha -; CHECK: addi 3, 3, __once_call@got@tlsgd@l +; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_call@tlsgd) ; CHECK-NEXT: nop ; CHECK: std {{[0-9]+}}, 0(3)