Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -219,29 +219,43 @@ /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, - /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsgd\@l. + /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by + /// ADDIS_TLSGD_L_ADDR until after register assignment. ADDI_TLSGD_L, - /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsgd). + /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by + /// ADDIS_TLSGD_L_ADDR until after register assignment. GET_TLS_ADDR, + /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that + /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following + /// register assignment. + ADDI_TLSGD_L_ADDR, + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, - /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsld\@l. + /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by + /// ADDIS_TLSLD_L_ADDR until after register assignment. ADDI_TLSLD_L, - /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsld). + /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by + /// ADDIS_TLSLD_L_ADDR until after register assignment. GET_TLSLD_ADDR, + /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that + /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion + /// following register assignment. + ADDI_TLSLD_L_ADDR, + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds X3 to /// sym\@dtprel\@ha. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1760,9 +1760,8 @@ else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, - PtrVT, GOTPtr, TGA); - return DAG.getNode(PPCISD::GET_TLS_ADDR, dl, PtrVT, GOTEntry, TGA); + return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, + GOTPtr, TGA, TGA); } if (Model == TLSModel::LocalDynamic) { @@ -1779,10 +1778,8 @@ else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, - GOTPtr, TGA); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, - PtrVT, GOTEntry, TGA); + SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, + PtrVT, GOTPtr, TGA, TGA); SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -907,6 +907,16 @@ [(set i64:$rD, (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; +// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8,X3] in +def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD), + (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), + "#ADDItlsgdLADDR", + [(set i64:$rD, + (PPCaddiTlsgdLAddr i64:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>, + isPPC64; def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, @@ -923,6 +933,16 @@ [(set i64:$rD, (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; +// Combined op for ADDItlsldL and GETtlsADDR, late expanded. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8,X3] in +def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD), + (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), + "#ADDItlsldLADDR", + [(set i64:$rD, + (PPCaddiTlsldLAddr i64:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>, + isPPC64; def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -111,9 +111,17 @@ def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", + SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", + SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; @@ -2513,6 +2521,15 @@ "GETtlsADDR32", [(set i32:$rD, (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; +// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR,R3] in +def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD), + (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), + "#ADDItlsgdLADDR32", + [(set i32:$rD, + (PPCaddiTlsgdLAddr i32:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>; def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, @@ -2523,6 +2540,15 @@ [(set i32:$rD, (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>; +// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR,R3] in +def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD), + (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), + "#ADDItlsldLADDR32", + [(set i32:$rD, + (PPCaddiTlsldLAddr i32:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>; def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, Index: lib/Target/PowerPC/PPCTLSDynamicCall.cpp =================================================================== --- lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -7,10 +7,17 @@ // //===----------------------------------------------------------------------===// // -// This pass fixes up GETtls[ld]ADDR[32] machine instructions so that -// they read and write GPR3. These are really call instructions, so -// must use the calling convention registers. This is done in a late -// pass so that TLS variable accesses can be fully commoned. +// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into +// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of +// which define GPR3. A copy is added from GPR3 to the target virtual +// register of the original instruction. The GETtlsADDR[32] is really +// a call instruction, so its target register is constrained to be GPR3. +// This is not true of ADDItls[gd]L[32], but there is a legacy linker +// optimization bug that requires the target register of the addi of +// a local- or general-dynamic TLS access sequence to be GPR3. +// +// This is done in a late pass so that TLS variable accesses can be +// fully commoned by MachineCSE. // //===----------------------------------------------------------------------===// @@ -32,11 +39,6 @@ } namespace { - // PPCTLSDynamicCall pass - Add copies to and from GPR3 around - // GETtls[ld]ADDR[32] machine instructions. These instructions - // are actually call instructions, so the register choice is - // constrained. We delay introducing these copies as late as - // possible so that TLS variable accesses can be fully commoned. struct PPCTLSDynamicCall : public MachineFunctionPass { static char ID; PPCTLSDynamicCall() : MachineFunctionPass(ID) { @@ -55,10 +57,10 @@ I != IE; ++I) { MachineInstr *MI = I; - if (MI->getOpcode() != PPC::GETtlsADDR && - MI->getOpcode() != PPC::GETtlsldADDR && - MI->getOpcode() != PPC::GETtlsADDR32 && - MI->getOpcode() != PPC::GETtlsldADDR32) + if (MI->getOpcode() != PPC::ADDItlsgdLADDR && + MI->getOpcode() != PPC::ADDItlsldLADDR && + MI->getOpcode() != PPC::ADDItlsgdLADDR32 && + MI->getOpcode() != PPC::ADDItlsldLADDR32) continue; DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;); @@ -67,14 +69,41 @@ unsigned InReg = MI->getOperand(1).getReg(); DebugLoc DL = MI->getDebugLoc(); unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; + unsigned Opc1, Opc2; + + switch (MI->getOpcode()) { + default: + llvm_unreachable("Opcode inconsistency error"); + case PPC::ADDItlsgdLADDR: + Opc1 = PPC::ADDItlsgdL; + Opc2 = PPC::GETtlsADDR; + break; + case PPC::ADDItlsldLADDR: + Opc1 = PPC::ADDItlsldL; + Opc2 = PPC::GETtlsldADDR; + break; + case PPC::ADDItlsgdLADDR32: + Opc1 = PPC::ADDItlsgdL32; + Opc2 = PPC::GETtlsADDR32; + break; + case PPC::ADDItlsldLADDR32: + Opc1 = PPC::ADDItlsldL32; + Opc2 = PPC::GETtlsldADDR32; + break; + } - BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) + MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) .addReg(InReg); - MI->getOperand(0).setReg(GPR3); - MI->getOperand(1).setReg(GPR3); + Addi->addOperand(MI->getOperand(2)); + + MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3) + .addReg(GPR3)); + Call->addOperand(MI->getOperand(3)); + BuildMI(MBB, ++I, DL, TII->get(TargetOpcode::COPY), OutReg) .addReg(GPR3); + MI->removeFromParent(); Changed = true; }