diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -130,12 +130,19 @@ PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : PPCTargetStreamer(S), OS(OS) {} - void emitTCEntry(const MCSymbol &S) override { + void emitTCEntry(const MCSymbol &S, + MCSymbolRefExpr::VariantKind Kind) override { if (const MCSymbolXCOFF *XSym = dyn_cast(&S)) { MCSymbolXCOFF *TCSym = cast(Streamer.getCurrentSectionOnly()) ->getQualNameSymbol(); - OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n'; + // If the variant kind is TLSGD the entry represents the region handle for + // the symbol, we prefix the name with a dot and we add the @m + // relocation specifier. + if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_TLSGD) + OS << "\t.tc ." << TCSym->getName() << "," << XSym->getName() << "@m\n"; + else + OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n'; if (TCSym->hasRename()) Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName()); @@ -172,7 +179,8 @@ return static_cast(Streamer); } - void emitTCEntry(const MCSymbol &S) override { + void emitTCEntry(const MCSymbol &S, + MCSymbolRefExpr::VariantKind Kind) override { // Creates a R_PPC64_TOC relocation Streamer.emitValueToAlignment(8); Streamer.emitSymbolValue(&S, 8); @@ -276,7 +284,8 @@ public: PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} - void emitTCEntry(const MCSymbol &S) override { + void emitTCEntry(const MCSymbol &S, + MCSymbolRefExpr::VariantKind Kind) override { llvm_unreachable("Unknown pseudo-op: .tc"); } @@ -298,7 +307,8 @@ public: PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} - void emitTCEntry(const MCSymbol &S) override { + void emitTCEntry(const MCSymbol &S, + MCSymbolRefExpr::VariantKind Kind) override { const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); const unsigned PointerSize = MAI->getCodePointerSize(); Streamer.emitValueToAlignment(PointerSize); diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -526,7 +526,7 @@ /// call to __tls_get_addr to the current output stream. void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK) { - StringRef Name = "__tls_get_addr"; + StringRef Name = Subtarget->isAIXABI() ? ".__tls_get_addr" : "__tls_get_addr"; MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name); MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; unsigned Opcode = PPC::BL8_NOP_TLS; @@ -548,6 +548,23 @@ (!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) && "GETtls[ld]ADDR[32] must read GPR3"); + if (Subtarget->isAIXABI()) { + // On AIX, the variable offset should already be in R4 and the region handle + // should already be in R3. + // For TLSGD, which currently is the only supported access model, we only + // need to generate an absolute branch to .__tls_get_addr. + Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4; + (void)VarOffsetReg; + assert(MI->getOperand(2).isReg() && + MI->getOperand(2).getReg() == VarOffsetReg && + "GETtls[ld]ADDR[32] must read GPR4"); + MCSymbol *TlsGetAddrA = OutContext.getOrCreateSymbol(Name); + const MCExpr *TlsRef = MCSymbolRefExpr::create( + TlsGetAddrA, MCSymbolRefExpr::VK_None, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef)); + return; + } + if (Subtarget->is32BitELFABI() && isPositionIndependent()) Kind = MCSymbolRefExpr::VK_PLT; @@ -638,10 +655,11 @@ auto getTOCEntryLoadingExprForXCOFF = [IsPPC64, getTOCRelocAdjustedExprForXCOFF, - this](const MCSymbol *MOSymbol, const MCExpr *Expr) -> const MCExpr * { + this](const MCSymbol *MOSymbol, const MCExpr *Expr, + MCSymbolRefExpr::VariantKind VK = + MCSymbolRefExpr::VariantKind::VK_None) -> const MCExpr * { const unsigned EntryByteSize = IsPPC64 ? 8 : 4; - const auto TOCEntryIter = - TOC.find({MOSymbol, MCSymbolRefExpr::VariantKind::VK_None}); + const auto TOCEntryIter = TOC.find({MOSymbol, VK}); assert(TOCEntryIter != TOC.end() && "Could not find the TOC entry for this symbol."); const ptrdiff_t EntryDistanceFromTOCBase = @@ -653,6 +671,14 @@ return Expr; }; + auto GetVKForMO = [&](const MachineOperand &MO) { + // For GD TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + if (IsAIX && (MO.getTargetFlags() & PPCII::MO_TLSGD_FLAG)) + return MCSymbolRefExpr::VariantKind::VK_PPC_TLSGD; + return MCSymbolRefExpr::VariantKind::VK_None; + }; // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { @@ -786,10 +812,15 @@ return; } + // For TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO); + // Otherwise, use the TOC. 'TOCEntry' is a label used to reference the // storage allocated in the TOC which contains the address of // 'MOSymbol'. Said TOC entry will be synthesized later. - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK); const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext); @@ -800,7 +831,7 @@ assert( TM.getCodeModel() == CodeModel::Small && "This pseudo should only be selected for 32-bit small code model."); - Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp); + Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp, VK); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -832,17 +863,21 @@ // Map the operand to its corresponding MCSymbol. const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // For TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO); + // Map the machine operand to its corresponding MCSymbol, then map the // global address operand to be a reference to the TOC entry we will // synthesize later. - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK); - const MCSymbolRefExpr::VariantKind VK = + MCSymbolRefExpr::VariantKind VKExpr = IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; - const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, VK, OutContext); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, VKExpr, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr( - IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp) : Exp); + IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp, VK) : Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -864,11 +899,16 @@ // Map the machine operand to its corresponding MCSymbol. MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // For TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO); + // Always use TOC on AIX. Map the global address operand to be a reference // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to // reference the storage allocated in the TOC which contains the address of // 'MOSymbol'. - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK); const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_U, OutContext); @@ -894,11 +934,16 @@ // Map the machine operand to its corresponding MCSymbol. MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // For TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO); + // Always use TOC on AIX. Map the global address operand to be a reference // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to // reference the storage allocated in the TOC which contains the address of // 'MOSymbol'. - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK); const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_L, OutContext); @@ -922,14 +967,18 @@ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // For TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO); + const bool GlobalToc = MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); if (GlobalToc || MO.isJTI() || MO.isBlockAddress() || (MO.isCPI() && TM.getCodeModel() == CodeModel::Large)) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, VK); - const MCSymbolRefExpr::VariantKind VK = - IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA; + VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA; const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, VK, OutContext); @@ -966,11 +1015,15 @@ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // For TLS access on AIX, we have two TOC entries for the symbol (one for + // the offset and the other for the region handle). They are differentiated + // by the presence of the PPCII::MO_TLSGD_FLAG. + MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO); + if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, VK); - const MCSymbolRefExpr::VariantKind VK = - IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO; + VK = IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO; const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, VK, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); @@ -1119,6 +1172,10 @@ // Transform: %x3 = GETtlsADDR %x3, @sym // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd) case PPC::GETtlsADDRPCREL: + case PPC::GETtlsADDR32AIX: + // Transform: %r3 = GETtlsADDR32AIX %r3, %r4 + // Into: BLA .__tls_get_addr() + // Unlike on Linux, there is no symbol or relocation needed for this call. case PPC::GETtlsADDR32: { // Transform: %r3 = GETtlsADDR32 %r3, @sym // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT @@ -1546,7 +1603,7 @@ OutStreamer->emitLabel(TOCEntryLabel); if (isPPC64 && TS != nullptr) - TS->emitTCEntry(*TOCEntryTarget); + TS->emitTCEntry(*TOCEntryTarget, TOCMapPair.first.second); else OutStreamer->emitSymbolValue(TOCEntryTarget, 4); } @@ -2199,7 +2256,7 @@ OutStreamer->emitLabel(I.second); if (TS != nullptr) - TS->emitTCEntry(*I.first.first); + TS->emitTCEntry(*I.first.first, I.first.second); } } @@ -2273,6 +2330,11 @@ switch (MI->getOpcode()) { default: break; + case PPC::GETtlsADDR32AIX: { + MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(".__tls_get_addr"); + ExtSymSDNodeSymbols.insert(TlsGetAddr); + break; + } case PPC::BL8: case PPC::BL: case PPC::BL8_NOP: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -355,6 +355,15 @@ /// register assignment. ADDI_TLSGD_L_ADDR, + /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY + /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY + /// Op that combines two register copies of TOC entries + /// (region handle into R3 and variable offset into R4) followed by a + /// GET_TLS_ADDR node which will be expanded to a call to __get_tls_addr. + /// This node is used in 64-bit mode as well (in which case the result is + /// G8RC and inputs are X3/X4). + TLSGD_AIX, + /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. @@ -1123,6 +1132,8 @@ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1572,6 +1572,7 @@ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; + case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; @@ -3118,7 +3119,42 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) - report_fatal_error("TLS is not yet supported on AIX."); + return LowerGlobalTLSAddressAIX(Op, DAG); + + return LowerGlobalTLSAddressLinux(Op, DAG); +} + +SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *GA = cast(Op); + + if (DAG.getTarget().useEmulatedTLS()) + report_fatal_error("Emulated TLS is not yet supported on AIX"); + + if (Subtarget.isPPC64()) + report_fatal_error("TLS is not yet supported on AIX PPC64"); + + SDLoc dl(GA); + const GlobalValue *GV = GA->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // The general-dynamic model is the only access model supported for now, so + // all the GlobalTLSAddress nodes are lowered with this model. + // We need to generate two TOC entries, one for the variable offset, one for + // the region handle. The global address for the TOC entry of the region + // handle is created with the MO_TLSGD_FLAG flag so we can easily identify + // this entry and add the right relocation. + SDValue VariableOffsetTGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue RegionHandleTGA = + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG); + SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); + SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA); + return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset, + RegionHandle); +} + +SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op, + SelectionDAG &DAG) const { // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -218,6 +218,7 @@ SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; +def PPCTlsgdAIX : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; @@ -3419,6 +3420,15 @@ "GETtlsADDR32", [(set i32:$rD, (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; +// R3 is explicitly defined when this op is created, so not mentioned here. +// The rest of the Defs are the exact set of registers that will be clobbered by +// the call. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R4,R5,R11,LR,CR0] in +def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handle), + "GETtlsADDR32AIX", + [(set i32:$rD, + (PPCgetTlsAddr i32:$offset, i32:$handle))]>; // Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, @@ -3434,6 +3444,12 @@ "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +// This pseudo is expanded to two copies to put the variable offset in R4 and +// the region handle in R3 and GETtlsADDR32AIX. +def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handle), + "#TLSGDAIX", + [(set i32:$rD, + (PPCTlsgdAIX i32:$offset, i32:$handle))]>; // LR is a true define, while the rest of the Defs are clobbers. R3 is // explicitly defined when this op is created, so not mentioned here. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp --- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -50,6 +50,7 @@ bool Changed = false; bool NeedFence = true; bool Is64Bit = MBB.getParent()->getSubtarget().isPPC64(); + bool IsAIX = MBB.getParent()->getSubtarget().isAIXABI(); bool IsPCREL = false; for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); @@ -60,7 +61,8 @@ if (MI.getOpcode() != PPC::ADDItlsgdLADDR && MI.getOpcode() != PPC::ADDItlsldLADDR && MI.getOpcode() != PPC::ADDItlsgdLADDR32 && - MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsPCREL) { + MI.getOpcode() != PPC::ADDItlsldLADDR32 && + MI.getOpcode() != PPC::TLSGDAIX && !IsPCREL) { // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP // as scheduling fences, we skip creating fences if we already // have existing ADJCALLSTACKDOWN/UP to avoid nesting, @@ -79,6 +81,7 @@ Register OutReg = MI.getOperand(0).getReg(); Register InReg = PPC::NoRegister; Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; + Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; SmallVector OrigRegs = {OutReg, GPR3}; if (!IsPCREL) { InReg = MI.getOperand(1).getReg(); @@ -106,6 +109,11 @@ Opc1 = PPC::ADDItlsldL32; Opc2 = PPC::GETtlsldADDR32; break; + case PPC::TLSGDAIX: + // TLSGDAIX is expanded to two copies and GET_TLS_ADDR, so we only + // set Opc2 here. + Opc2 = PPC::GETtlsADDR32AIX; + break; case PPC::PADDI8pc: assert(IsPCREL && "Expecting General/Local Dynamic PCRel"); Opc1 = PPC::PADDI8pc; @@ -125,29 +133,38 @@ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) .addImm(0); - MachineInstr *Addi; - if (IsPCREL) { - Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0); - } else { - // Expand into two ops built prior to the existing instruction. - assert(InReg != PPC::NoRegister && "Operand must be a register"); - Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg); - } - - Addi->addOperand(MI.getOperand(2)); - // The ADDItls* instruction is the first instruction in the // repair range. MachineBasicBlock::iterator First = I; --First; - MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3) - .addReg(GPR3)); - if (IsPCREL) - Call->addOperand(MI.getOperand(2)); - else - Call->addOperand(MI.getOperand(3)); - + if (IsAIX) { + // The variable offset and region handle are copied in r4 and r3. The + // copies are followed by the GETtlsADDR32AIX instruction. + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4) + .addReg(MI.getOperand(1).getReg()); + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) + .addReg(MI.getOperand(2).getReg()); + BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4); + } else { + MachineInstr *Addi; + if (IsPCREL) { + Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0); + } else { + // Expand into two ops built prior to the existing instruction. + assert(InReg != PPC::NoRegister && "Operand must be a register"); + Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg); + } + + Addi->addOperand(MI.getOperand(2)); + + MachineInstr *Call = + (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3)); + if (IsPCREL) + Call->addOperand(MI.getOperand(2)); + else + Call->addOperand(MI.getOperand(3)); + } if (NeedFence) BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); diff --git a/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/llvm/lib/Target/PowerPC/PPCTargetStreamer.h --- a/llvm/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/llvm/lib/Target/PowerPC/PPCTargetStreamer.h @@ -23,7 +23,8 @@ PPCTargetStreamer(MCStreamer &S); ~PPCTargetStreamer() override; - virtual void emitTCEntry(const MCSymbol &S) = 0; + virtual void emitTCEntry(const MCSymbol &S, + MCSymbolRefExpr::VariantKind Kind) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0; diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-checks.ll b/llvm/test/CodeGen/PowerPC/aix-tls-checks.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-checks.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-checks.ll @@ -1,9 +1,7 @@ ; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ -; RUN: -mtriple powerpc-ibm-aix-xcoff < %s - 2>&1 | FileCheck %s -; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s - 2>&1 | FileCheck %s -; CHECK: TLS is not yet supported on AIX +; CHECK: TLS is not yet supported on AIX PPC64 @tls1 = thread_local global i32 0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 + +@TGInit = thread_local global double 1.000000e+00, align 8 +@TWInit = weak thread_local global double 1.000000e+00, align 8 +@GInit = global double 1.000000e+00, align 8 +@TGUninit = thread_local global double 0.000000e+00, align 8 +@TIInit = internal thread_local global double 1.000000e+00, align 8 + +; Function Attrs: noinline nounwind optnone +define void @storesTGUninit(double %Val) #0 { +; SMALL32-LABEL: storesTGUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stfd 1, 32(31) +; SMALL32-NEXT: lfd 0, 32(31) +; SMALL32-NEXT: lwz 3, L..C0(2) +; SMALL32-NEXT: lwz 4, L..C1(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stfd 0, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTGUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stfd 1, 32(31) +; LARGE32-NEXT: lfd 0, 32(31) +; LARGE32-NEXT: addis 3, L..C0@u(2) +; LARGE32-NEXT: lwz 3, L..C0@l(3) +; LARGE32-NEXT: addis 4, L..C1@u(2) +; LARGE32-NEXT: lwz 4, L..C1@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stfd 0, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca double, align 8 + store double %Val, double* %Val.addr, align 8 + %0 = load double, double* %Val.addr, align 8 + store double %0, double* @TGUninit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTGInit(double %Val) #0 { +; SMALL32-LABEL: storesTGInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stfd 1, 32(31) +; SMALL32-NEXT: lfd 0, 32(31) +; SMALL32-NEXT: lwz 3, L..C2(2) +; SMALL32-NEXT: lwz 4, L..C3(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stfd 0, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTGInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stfd 1, 32(31) +; LARGE32-NEXT: lfd 0, 32(31) +; LARGE32-NEXT: addis 3, L..C2@u(2) +; LARGE32-NEXT: lwz 3, L..C2@l(3) +; LARGE32-NEXT: addis 4, L..C3@u(2) +; LARGE32-NEXT: lwz 4, L..C3@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stfd 0, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca double, align 8 + store double %Val, double* %Val.addr, align 8 + %0 = load double, double* %Val.addr, align 8 + store double %0, double* @TGInit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTIInit(double %Val) #0 { +; SMALL32-LABEL: storesTIInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stfd 1, 32(31) +; SMALL32-NEXT: lfd 0, 32(31) +; SMALL32-NEXT: lwz 3, L..C4(2) +; SMALL32-NEXT: lwz 4, L..C5(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stfd 0, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTIInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stfd 1, 32(31) +; LARGE32-NEXT: lfd 0, 32(31) +; LARGE32-NEXT: addis 3, L..C4@u(2) +; LARGE32-NEXT: lwz 3, L..C4@l(3) +; LARGE32-NEXT: addis 4, L..C5@u(2) +; LARGE32-NEXT: lwz 4, L..C5@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stfd 0, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca double, align 8 + store double %Val, double* %Val.addr, align 8 + %0 = load double, double* %Val.addr, align 8 + store double %0, double* @TIInit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTWInit(double %Val) #0 { +; SMALL32-LABEL: storesTWInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stfd 1, 32(31) +; SMALL32-NEXT: lfd 0, 32(31) +; SMALL32-NEXT: lwz 3, L..C6(2) +; SMALL32-NEXT: lwz 4, L..C7(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stfd 0, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTWInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stfd 1, 32(31) +; LARGE32-NEXT: lfd 0, 32(31) +; LARGE32-NEXT: addis 3, L..C6@u(2) +; LARGE32-NEXT: lwz 3, L..C6@l(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stfd 0, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca double, align 8 + store double %Val, double* %Val.addr, align 8 + %0 = load double, double* %Val.addr, align 8 + store double %0, double* @TWInit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define double @loadsTGUninit() #0 { +; SMALL32-LABEL: loadsTGUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C0(2) +; SMALL32-NEXT: lwz 4, L..C1(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lfd 0, 0(3) +; SMALL32-NEXT: lwz 3, L..C8(2) +; SMALL32-NEXT: lfd 1, 0(3) +; SMALL32-NEXT: fadd 1, 0, 1 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTGUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C0@u(2) +; LARGE32-NEXT: lwz 3, L..C0@l(3) +; LARGE32-NEXT: addis 4, L..C1@u(2) +; LARGE32-NEXT: lwz 4, L..C1@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lfd 0, 0(3) +; LARGE32-NEXT: addis 3, L..C8@u(2) +; LARGE32-NEXT: lwz 3, L..C8@l(3) +; LARGE32-NEXT: lfd 1, 0(3) +; LARGE32-NEXT: fadd 1, 0, 1 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load double, double* @TGUninit, align 8 + %1 = load double, double* @GInit, align 8 + %add = fadd double %0, %1 + ret double %add +} + +; Function Attrs: noinline nounwind optnone +define double @loadsTGInit() #0 { +; SMALL32-LABEL: loadsTGInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C2(2) +; SMALL32-NEXT: lwz 4, L..C3(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lfd 0, 0(3) +; SMALL32-NEXT: lwz 3, L..C8(2) +; SMALL32-NEXT: lfd 1, 0(3) +; SMALL32-NEXT: fadd 1, 0, 1 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTGInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C2@u(2) +; LARGE32-NEXT: lwz 3, L..C2@l(3) +; LARGE32-NEXT: addis 4, L..C3@u(2) +; LARGE32-NEXT: lwz 4, L..C3@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lfd 0, 0(3) +; LARGE32-NEXT: addis 3, L..C8@u(2) +; LARGE32-NEXT: lwz 3, L..C8@l(3) +; LARGE32-NEXT: lfd 1, 0(3) +; LARGE32-NEXT: fadd 1, 0, 1 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load double, double* @TGInit, align 8 + %1 = load double, double* @GInit, align 8 + %add = fadd double %0, %1 + ret double %add +} + +; Function Attrs: noinline nounwind optnone +define double @loadsTIInit() #0 { +; SMALL32-LABEL: loadsTIInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C4(2) +; SMALL32-NEXT: lwz 4, L..C5(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lfd 0, 0(3) +; SMALL32-NEXT: lwz 3, L..C8(2) +; SMALL32-NEXT: lfd 1, 0(3) +; SMALL32-NEXT: fadd 1, 0, 1 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTIInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C4@u(2) +; LARGE32-NEXT: lwz 3, L..C4@l(3) +; LARGE32-NEXT: addis 4, L..C5@u(2) +; LARGE32-NEXT: lwz 4, L..C5@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lfd 0, 0(3) +; LARGE32-NEXT: addis 3, L..C8@u(2) +; LARGE32-NEXT: lwz 3, L..C8@l(3) +; LARGE32-NEXT: lfd 1, 0(3) +; LARGE32-NEXT: fadd 1, 0, 1 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load double, double* @TIInit, align 8 + %1 = load double, double* @GInit, align 8 + %add = fadd double %0, %1 + ret double %add +} + +; Function Attrs: noinline nounwind optnone +define double @loadsTWInit() #0 { +; SMALL32-LABEL: loadsTWInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C6(2) +; SMALL32-NEXT: lwz 4, L..C7(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lfd 0, 0(3) +; SMALL32-NEXT: lwz 3, L..C8(2) +; SMALL32-NEXT: lfd 1, 0(3) +; SMALL32-NEXT: fadd 1, 0, 1 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTWInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C6@u(2) +; LARGE32-NEXT: lwz 3, L..C6@l(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lfd 0, 0(3) +; LARGE32-NEXT: addis 3, L..C8@u(2) +; LARGE32-NEXT: lwz 3, L..C8@l(3) +; LARGE32-NEXT: lfd 1, 0(3) +; LARGE32-NEXT: fadd 1, 0, 1 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load double, double* @TWInit, align 8 + %1 = load double, double* @GInit, align 8 + %add = fadd double %0, %1 + ret double %add +} + +attributes #0 = { noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 + +@TGInit = thread_local global i32 1, align 4 +@GInit = global i32 1, align 4 +@TGUninit = thread_local global i32 0, align 4 +@TIUninit = internal thread_local global i32 0, align 4 +@TWUninit = weak thread_local global i32 0, align 4 + +; Function Attrs: noinline nounwind optnone +define void @storesTGUninit(i32 %Val) #0 { +; SMALL32-LABEL: storesTGUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 3, 24(31) +; SMALL32-NEXT: lwz 6, 24(31) +; SMALL32-NEXT: lwz 3, L..C0(2) +; SMALL32-NEXT: lwz 4, L..C1(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTGUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 3, 24(31) +; LARGE32-NEXT: lwz 6, 24(31) +; LARGE32-NEXT: addis 3, L..C0@u(2) +; LARGE32-NEXT: lwz 3, L..C0@l(3) +; LARGE32-NEXT: addis 4, L..C1@u(2) +; LARGE32-NEXT: lwz 4, L..C1@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i32, align 4 + store i32 %Val, i32* %Val.addr, align 4 + %0 = load i32, i32* %Val.addr, align 4 + store i32 %0, i32* @TGUninit, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTGInit(i32 %Val) #0 { +; SMALL32-LABEL: storesTGInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 3, 24(31) +; SMALL32-NEXT: lwz 6, 24(31) +; SMALL32-NEXT: lwz 3, L..C2(2) +; SMALL32-NEXT: lwz 4, L..C3(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTGInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 3, 24(31) +; LARGE32-NEXT: lwz 6, 24(31) +; LARGE32-NEXT: addis 3, L..C2@u(2) +; LARGE32-NEXT: lwz 3, L..C2@l(3) +; LARGE32-NEXT: addis 4, L..C3@u(2) +; LARGE32-NEXT: lwz 4, L..C3@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i32, align 4 + store i32 %Val, i32* %Val.addr, align 4 + %0 = load i32, i32* %Val.addr, align 4 + store i32 %0, i32* @TGInit, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTIUninit(i32 %Val) #0 { +; SMALL32-LABEL: storesTIUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 3, 24(31) +; SMALL32-NEXT: lwz 6, 24(31) +; SMALL32-NEXT: lwz 3, L..C4(2) +; SMALL32-NEXT: lwz 4, L..C5(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTIUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 3, 24(31) +; LARGE32-NEXT: lwz 6, 24(31) +; LARGE32-NEXT: addis 3, L..C4@u(2) +; LARGE32-NEXT: lwz 3, L..C4@l(3) +; LARGE32-NEXT: addis 4, L..C5@u(2) +; LARGE32-NEXT: lwz 4, L..C5@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i32, align 4 + store i32 %Val, i32* %Val.addr, align 4 + %0 = load i32, i32* %Val.addr, align 4 + store i32 %0, i32* @TIUninit, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTWUninit(i32 %Val) #0 { +; SMALL32-LABEL: storesTWUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 3, 24(31) +; SMALL32-NEXT: lwz 6, 24(31) +; SMALL32-NEXT: lwz 3, L..C6(2) +; SMALL32-NEXT: lwz 4, L..C7(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTWUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 3, 24(31) +; LARGE32-NEXT: lwz 6, 24(31) +; LARGE32-NEXT: addis 3, L..C6@u(2) +; LARGE32-NEXT: lwz 3, L..C6@l(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i32, align 4 + store i32 %Val, i32* %Val.addr, align 4 + %0 = load i32, i32* %Val.addr, align 4 + store i32 %0, i32* @TWUninit, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone +define i32 @loadsTGUninit() #0 { +; SMALL32-LABEL: loadsTGUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C0(2) +; SMALL32-NEXT: lwz 4, L..C1(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 4, L..C8(2) +; SMALL32-NEXT: lwz 4, 0(4) +; SMALL32-NEXT: add 3, 3, 4 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTGUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C0@u(2) +; LARGE32-NEXT: lwz 3, L..C0@l(3) +; LARGE32-NEXT: addis 4, L..C1@u(2) +; LARGE32-NEXT: lwz 4, L..C1@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 4, L..C8@u(2) +; LARGE32-NEXT: lwz 4, L..C8@l(4) +; LARGE32-NEXT: lwz 4, 0(4) +; LARGE32-NEXT: add 3, 3, 4 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i32, i32* @TGUninit, align 4 + %1 = load i32, i32* @GInit, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind optnone +define i32 @loadsTGInit() #0 { +; SMALL32-LABEL: loadsTGInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C2(2) +; SMALL32-NEXT: lwz 4, L..C3(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 4, L..C8(2) +; SMALL32-NEXT: lwz 4, 0(4) +; SMALL32-NEXT: add 3, 3, 4 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTGInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C2@u(2) +; LARGE32-NEXT: lwz 3, L..C2@l(3) +; LARGE32-NEXT: addis 4, L..C3@u(2) +; LARGE32-NEXT: lwz 4, L..C3@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 4, L..C8@u(2) +; LARGE32-NEXT: lwz 4, L..C8@l(4) +; LARGE32-NEXT: lwz 4, 0(4) +; LARGE32-NEXT: add 3, 3, 4 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i32, i32* @TGInit, align 4 + %1 = load i32, i32* @GInit, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind optnone +define i32 @loadsTIUninit() #0 { +; SMALL32-LABEL: loadsTIUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C4(2) +; SMALL32-NEXT: lwz 4, L..C5(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 4, L..C8(2) +; SMALL32-NEXT: lwz 4, 0(4) +; SMALL32-NEXT: add 3, 3, 4 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTIUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C4@u(2) +; LARGE32-NEXT: lwz 3, L..C4@l(3) +; LARGE32-NEXT: addis 4, L..C5@u(2) +; LARGE32-NEXT: lwz 4, L..C5@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 4, L..C8@u(2) +; LARGE32-NEXT: lwz 4, L..C8@l(4) +; LARGE32-NEXT: lwz 4, 0(4) +; LARGE32-NEXT: add 3, 3, 4 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i32, i32* @TIUninit, align 4 + %1 = load i32, i32* @GInit, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind optnone +define i32 @loadsTWUninit() #0 { +; SMALL32-LABEL: loadsTWUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C6(2) +; SMALL32-NEXT: lwz 4, L..C7(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 4, L..C8(2) +; SMALL32-NEXT: lwz 4, 0(4) +; SMALL32-NEXT: add 3, 3, 4 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTWUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C6@u(2) +; LARGE32-NEXT: lwz 3, L..C6@l(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 4, L..C8@u(2) +; LARGE32-NEXT: lwz 4, L..C8@l(4) +; LARGE32-NEXT: lwz 4, 0(4) +; LARGE32-NEXT: add 3, 3, 4 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i32, i32* @TWUninit, align 4 + %1 = load i32, i32* @GInit, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +attributes #0 = { noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll @@ -0,0 +1,463 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 + +@TGInit = thread_local global i64 1, align 8 +@TWInit = weak thread_local global i64 1, align 8 +@GInit = global i64 1, align 8 +@TIUninit = internal thread_local global i64 0, align 8 +@TIInit = internal thread_local global i64 1, align 8 + +; Function Attrs: noinline nounwind optnone +define void @storesTGInit(i64 %Val) #0 { +; SMALL32-LABEL: storesTGInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 4, 36(31) +; SMALL32-NEXT: stw 3, 32(31) +; SMALL32-NEXT: lwz 6, 32(31) +; SMALL32-NEXT: lwz 7, 36(31) +; SMALL32-NEXT: lwz 3, L..C0(2) +; SMALL32-NEXT: lwz 4, L..C1(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 7, 4(3) +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTGInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 4, 36(31) +; LARGE32-NEXT: stw 3, 32(31) +; LARGE32-NEXT: lwz 6, 32(31) +; LARGE32-NEXT: lwz 7, 36(31) +; LARGE32-NEXT: addis 3, L..C0@u(2) +; LARGE32-NEXT: lwz 3, L..C0@l(3) +; LARGE32-NEXT: addis 4, L..C1@u(2) +; LARGE32-NEXT: lwz 4, L..C1@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 7, 4(3) +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i64, align 8 + store i64 %Val, i64* %Val.addr, align 8 + %0 = load i64, i64* %Val.addr, align 8 + store i64 %0, i64* @TGInit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTIUninit(i64 %Val) #0 { +; SMALL32-LABEL: storesTIUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 4, 36(31) +; SMALL32-NEXT: stw 3, 32(31) +; SMALL32-NEXT: lwz 6, 32(31) +; SMALL32-NEXT: lwz 7, 36(31) +; SMALL32-NEXT: lwz 3, L..C2(2) +; SMALL32-NEXT: lwz 4, L..C3(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 7, 4(3) +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTIUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 4, 36(31) +; LARGE32-NEXT: stw 3, 32(31) +; LARGE32-NEXT: lwz 6, 32(31) +; LARGE32-NEXT: lwz 7, 36(31) +; LARGE32-NEXT: addis 3, L..C2@u(2) +; LARGE32-NEXT: lwz 3, L..C2@l(3) +; LARGE32-NEXT: addis 4, L..C3@u(2) +; LARGE32-NEXT: lwz 4, L..C3@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 7, 4(3) +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i64, align 8 + store i64 %Val, i64* %Val.addr, align 8 + %0 = load i64, i64* %Val.addr, align 8 + store i64 %0, i64* @TIUninit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTIInit(i64 %Val) #0 { +; SMALL32-LABEL: storesTIInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 4, 36(31) +; SMALL32-NEXT: stw 3, 32(31) +; SMALL32-NEXT: lwz 6, 32(31) +; SMALL32-NEXT: lwz 7, 36(31) +; SMALL32-NEXT: lwz 3, L..C4(2) +; SMALL32-NEXT: lwz 4, L..C5(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 7, 4(3) +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTIInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 4, 36(31) +; LARGE32-NEXT: stw 3, 32(31) +; LARGE32-NEXT: lwz 6, 32(31) +; LARGE32-NEXT: lwz 7, 36(31) +; LARGE32-NEXT: addis 3, L..C4@u(2) +; LARGE32-NEXT: lwz 3, L..C4@l(3) +; LARGE32-NEXT: addis 4, L..C5@u(2) +; LARGE32-NEXT: lwz 4, L..C5@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 7, 4(3) +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i64, align 8 + store i64 %Val, i64* %Val.addr, align 8 + %0 = load i64, i64* %Val.addr, align 8 + store i64 %0, i64* @TIInit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @storesTWInit(i64 %Val) #0 { +; SMALL32-LABEL: storesTWInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -48(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: stw 4, 36(31) +; SMALL32-NEXT: stw 3, 32(31) +; SMALL32-NEXT: lwz 6, 32(31) +; SMALL32-NEXT: lwz 7, 36(31) +; SMALL32-NEXT: lwz 3, L..C6(2) +; SMALL32-NEXT: lwz 4, L..C7(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: stw 7, 4(3) +; SMALL32-NEXT: stw 6, 0(3) +; SMALL32-NEXT: addi 1, 1, 48 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storesTWInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -48(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: stw 4, 36(31) +; LARGE32-NEXT: stw 3, 32(31) +; LARGE32-NEXT: lwz 6, 32(31) +; LARGE32-NEXT: lwz 7, 36(31) +; LARGE32-NEXT: addis 3, L..C6@u(2) +; LARGE32-NEXT: lwz 3, L..C6@l(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: stw 7, 4(3) +; LARGE32-NEXT: stw 6, 0(3) +; LARGE32-NEXT: addi 1, 1, 48 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %Val.addr = alloca i64, align 8 + store i64 %Val, i64* %Val.addr, align 8 + %0 = load i64, i64* %Val.addr, align 8 + store i64 %0, i64* @TWInit, align 8 + ret void +} + +; Function Attrs: noinline nounwind optnone +define i64 @loadsTGInit() #0 { +; SMALL32-LABEL: loadsTGInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C0(2) +; SMALL32-NEXT: lwz 4, L..C1(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 4, 4(3) +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 5, L..C8(2) +; SMALL32-NEXT: lwz 6, 4(5) +; SMALL32-NEXT: lwz 5, 0(5) +; SMALL32-NEXT: addc 4, 4, 6 +; SMALL32-NEXT: adde 3, 3, 5 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTGInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C0@u(2) +; LARGE32-NEXT: lwz 3, L..C0@l(3) +; LARGE32-NEXT: addis 4, L..C1@u(2) +; LARGE32-NEXT: lwz 4, L..C1@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 4, 4(3) +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 5, L..C8@u(2) +; LARGE32-NEXT: lwz 5, L..C8@l(5) +; LARGE32-NEXT: lwz 6, 4(5) +; LARGE32-NEXT: lwz 5, 0(5) +; LARGE32-NEXT: addc 4, 4, 6 +; LARGE32-NEXT: adde 3, 3, 5 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i64, i64* @TGInit, align 8 + %1 = load i64, i64* @GInit, align 8 + %add = add nsw i64 %0, %1 + ret i64 %add +} + +; Function Attrs: noinline nounwind optnone +define i64 @loadsTIUninit() #0 { +; SMALL32-LABEL: loadsTIUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C2(2) +; SMALL32-NEXT: lwz 4, L..C3(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 4, 4(3) +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 5, L..C8(2) +; SMALL32-NEXT: lwz 6, 4(5) +; SMALL32-NEXT: lwz 5, 0(5) +; SMALL32-NEXT: addc 4, 4, 6 +; SMALL32-NEXT: adde 3, 3, 5 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTIUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C2@u(2) +; LARGE32-NEXT: lwz 3, L..C2@l(3) +; LARGE32-NEXT: addis 4, L..C3@u(2) +; LARGE32-NEXT: lwz 4, L..C3@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 4, 4(3) +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 5, L..C8@u(2) +; LARGE32-NEXT: lwz 5, L..C8@l(5) +; LARGE32-NEXT: lwz 6, 4(5) +; LARGE32-NEXT: lwz 5, 0(5) +; LARGE32-NEXT: addc 4, 4, 6 +; LARGE32-NEXT: adde 3, 3, 5 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i64, i64* @TIUninit, align 8 + %1 = load i64, i64* @GInit, align 8 + %add = add nsw i64 %0, %1 + ret i64 %add +} + +; Function Attrs: noinline nounwind optnone +define i64 @loadsTIInit() #0 { +; SMALL32-LABEL: loadsTIInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C4(2) +; SMALL32-NEXT: lwz 4, L..C5(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 4, 4(3) +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 5, L..C8(2) +; SMALL32-NEXT: lwz 6, 4(5) +; SMALL32-NEXT: lwz 5, 0(5) +; SMALL32-NEXT: addc 4, 4, 6 +; SMALL32-NEXT: adde 3, 3, 5 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTIInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C4@u(2) +; LARGE32-NEXT: lwz 3, L..C4@l(3) +; LARGE32-NEXT: addis 4, L..C5@u(2) +; LARGE32-NEXT: lwz 4, L..C5@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 4, 4(3) +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 5, L..C8@u(2) +; LARGE32-NEXT: lwz 5, L..C8@l(5) +; LARGE32-NEXT: lwz 6, 4(5) +; LARGE32-NEXT: lwz 5, 0(5) +; LARGE32-NEXT: addc 4, 4, 6 +; LARGE32-NEXT: adde 3, 3, 5 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i64, i64* @TIInit, align 8 + %1 = load i64, i64* @GInit, align 8 + %add = add nsw i64 %0, %1 + ret i64 %add +} + +; Function Attrs: noinline nounwind optnone +define i64 @loadsTWInit() #0 { +; SMALL32-LABEL: loadsTWInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr 0 +; SMALL32-NEXT: stw 31, -4(1) +; SMALL32-NEXT: stw 0, 8(1) +; SMALL32-NEXT: stwu 1, -32(1) +; SMALL32-NEXT: mr 31, 1 +; SMALL32-NEXT: lwz 3, L..C6(2) +; SMALL32-NEXT: lwz 4, L..C7(2) +; SMALL32-NEXT: bla .__tls_get_addr +; SMALL32-NEXT: lwz 4, 4(3) +; SMALL32-NEXT: lwz 3, 0(3) +; SMALL32-NEXT: lwz 5, L..C8(2) +; SMALL32-NEXT: lwz 6, 4(5) +; SMALL32-NEXT: lwz 5, 0(5) +; SMALL32-NEXT: addc 4, 4, 6 +; SMALL32-NEXT: adde 3, 3, 5 +; SMALL32-NEXT: addi 1, 1, 32 +; SMALL32-NEXT: lwz 0, 8(1) +; SMALL32-NEXT: lwz 31, -4(1) +; SMALL32-NEXT: mtlr 0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadsTWInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr 0 +; LARGE32-NEXT: stw 31, -4(1) +; LARGE32-NEXT: stw 0, 8(1) +; LARGE32-NEXT: stwu 1, -32(1) +; LARGE32-NEXT: mr 31, 1 +; LARGE32-NEXT: addis 3, L..C6@u(2) +; LARGE32-NEXT: lwz 3, L..C6@l(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: bla .__tls_get_addr +; LARGE32-NEXT: lwz 4, 4(3) +; LARGE32-NEXT: lwz 3, 0(3) +; LARGE32-NEXT: addis 5, L..C8@u(2) +; LARGE32-NEXT: lwz 5, L..C8@l(5) +; LARGE32-NEXT: lwz 6, 4(5) +; LARGE32-NEXT: lwz 5, 0(5) +; LARGE32-NEXT: addc 4, 4, 6 +; LARGE32-NEXT: adde 3, 3, 5 +; LARGE32-NEXT: addi 1, 1, 32 +; LARGE32-NEXT: lwz 0, 8(1) +; LARGE32-NEXT: lwz 31, -4(1) +; LARGE32-NEXT: mtlr 0 +; LARGE32-NEXT: blr +entry: + %0 = load i64, i64* @TWInit, align 8 + %1 = load i64, i64* @GInit, align 8 + %add = add nsw i64 %0, %1 + ret i64 %add +} + +attributes #0 = { noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" }