diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -1296,11 +1296,9 @@ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD, LDBRX, DFLOADf32, DFLOADf64, LFD, - LFDX, XFLOADf32, XFLOADf64, LFIWAX, LIWAX, LFIWZX, LIWZX, LHA, LHA8, - LHAX, LHAX8, LHBRX, LHBRX8, LHZ, LHZ8, LVEBX, @@ -1309,7 +1307,7 @@ LVX, LVXL, LWA, LWA_32, - LWAX, LWAX_32, + LWAX, LWAXTLS, LWAXTLS_32, LWAX_32, LWBRX, LWBRX8, LWZ, LWZ8, LWZtoc, LWZtocL, LXSD, @@ -1340,6 +1338,8 @@ ICBT, LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32, LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX, + LFDX, LFDXTLS, XFLOADf32, XFLOADf64, + LHAX, LHAX8, LHAXTLS, LHAXTLS_32, LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32, LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32, LXVL, @@ -1442,11 +1442,17 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], (instrs LFS, - LFSX, LXSSP, LXSSPX )>; +// 2-way crack instructions +// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], + (instrs + LFSX, LFSXTLS +)>; + // 4-way crack instructions // 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY], @@ -1823,12 +1829,10 @@ DFSTOREf32, DFSTOREf64, STFD, STFDU, STFDUX, - STFDX, STFIWX, STIWX, STFS, STFSU, STFSUX, - STFSX, STH, STH8, STHBRX, STHU, STHU8, @@ -1867,6 +1871,8 @@ CP_COPY, CP_COPY8, STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32, SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_, + STFDX, STFDXTLS, + STFSX, STFSXTLS, STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32, STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32, STXVL, diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -765,6 +765,7 @@ (instrs LFIWZX, LFDX, + LFDXTLS, LFD )>; @@ -815,9 +816,9 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C], (instrs - (instregex "LHA(X)?(8)?$"), + (instregex "LHA(X)?(TLS)?(8)?(_32)?$"), (instregex "CP_PASTE(8)?_rec$"), - (instregex "LWA(X)?(_32)?$"), + (instregex "LWA(X)?(TLS)?(_32)?$"), TCHECK )>; @@ -850,6 +851,7 @@ DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs LFSX, + LFSXTLS, LFS )>; @@ -891,7 +893,7 @@ // all three dispatches for the superslice. def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], (instrs - (instregex "STF(S|D|IWX|SX|DX)$"), + (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS)$"), (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), (instregex "STW(8)?$"), (instregex "(D|X)FSTORE(f32|f64)$"), diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -704,6 +704,48 @@ return false; } +// canOptimizeTLSDFormToXFormOnAIX - For TLS local-exec accesses on AIX 64-bit +// and 32-bit mode, an ADD_TLS node is produced to add the result of loading the +// variable offset to the thread pointer (X13 on 64-bit, or in R3 on 32-bit +// after calling .__get_tpointer). +// This ADD_TLS, followed by a D-Form memory operation, can be optimized to use +// an X-Form load or store, allowing the ADD_TLS node to be removed completely. +// This can be done as long as the memory operation's immediate offset is 0 and +// that the thread pointer is being added to. +static bool canOptimizeTLSDFormToXFormOnAIX(SelectionDAG *CurDAG, + SDValue Base, uint64_t Offset) { + const PPCSubtarget &Subtarget = + CurDAG->getMachineFunction().getSubtarget(); + if (Subtarget.isAIXABI()) { + // Do not do this transformation at -O0. + if (CurDAG->getTarget().getOptLevel() == CodeGenOpt::None) + return false; + + // The optimization to convert the D-Form load/store into its X-Form + // counterpart should only occur if the immediate offset is 0. + if (Offset != 0) + return false; + + // This optimization should also only be performed if the thread pointer + // is being added to the variable offset (loaded from the TOC). + // As seen in tryTLSXForm[Load|Store], Base is an ADD_TLS node. + + // The first operand in this instruction should either be: + // 1. A GET_TPOINTER PPCISD node. This represents a call to + // .__get_tpointer to get the thread pointer, which is for 32-bit only. + if (Base.getOperand(0).getOpcode() == PPCISD::GET_TPOINTER) + return true; + // 2. The thread pointer, stored in X13 on 64-bit mode. + RegisterSDNode *AddFirstOpReg = + dyn_cast_or_null(Base.getOperand(0).getNode()); + if (!AddFirstOpReg) + return false; + if (AddFirstOpReg->getReg() != PPC::X13) + return false; + } + return true; +} + bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Base = ST->getBasePtr(); if (Base.getOpcode() != PPCISD::ADD_TLS) @@ -713,6 +755,9 @@ return false; if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) return false; + // Check if the local-exec sequence on AIX can be optimized. + if (!canOptimizeTLSDFormToXFormOnAIX(CurDAG, Base, ST->getSrcValueOffset())) + return false; SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); @@ -738,6 +783,14 @@ Opcode = PPC::STDXTLS; break; } + case MVT::f32: { + Opcode = PPC::STFSXTLS; + break; + } + case MVT::f64: { + Opcode = PPC::STFDXTLS; + break; + } } SDValue Chain = ST->getChain(); SDVTList VTs = ST->getVTList(); @@ -758,10 +811,14 @@ return false; if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) return false; + // Check if the local-exec sequence on AIX can be optimized. + if (!canOptimizeTLSDFormToXFormOnAIX(CurDAG, Base, LD->getSrcValueOffset())) + return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); EVT RegVT = LD->getValueType(0); + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; unsigned Opcode; switch (MemVT.getSimpleVT().SimpleTy) { default: @@ -771,17 +828,31 @@ break; } case MVT::i16: { - Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; + if (isSExt) + Opcode = (RegVT == MVT::i32) ? PPC::LHAXTLS_32 : PPC::LHAXTLS; + else + Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; break; } case MVT::i32: { - Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; + if (isSExt) + Opcode = (RegVT == MVT::i32) ? PPC::LWAXTLS_32 : PPC::LWAXTLS; + else + Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; break; } case MVT::i64: { Opcode = PPC::LDXTLS; break; } + case MVT::f32: { + Opcode = PPC::LFSXTLS; + break; + } + case MVT::f64: { + Opcode = PPC::LFDXTLS; + break; + } } SDValue Chain = LD->getChain(); SDVTList VTs = LD->getVTList(); @@ -5405,9 +5476,10 @@ } case ISD::STORE: { - // Change TLS initial-exec D-form stores to X-form stores. + // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to + // X-form stores. StoreSDNode *ST = cast(N); - if (EnableTLSOpt && Subtarget->isELFv2ABI() && + if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; @@ -5420,8 +5492,9 @@ // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { - // Change TLS initial-exec D-form loads to X-form loads. - if (EnableTLSOpt && Subtarget->isELFv2ABI()) + // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to + // X-form loads. + if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI())) if (tryTLSXFormLoad(LD)) return; break; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -724,18 +724,32 @@ "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>; def LHZXTLS : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LHAXTLS : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lhax $RST, $RA, $RB", IIC_LdStLoad, []>; def LWZXTLS : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LWAXTLS : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lwax $RST, $RA, $RB", IIC_LdStLoad, []>; def LDXTLS : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64; def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>; def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LHAXTLS_32 : XForm_1<31, 343, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lhax $RST, $RA, $RB", IIC_LdStLoad, []>; def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LWAXTLS_32 : XForm_1<31, 341, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lwax $RST, $RA, $RB", IIC_LdStLoad, []>; } +let mayLoad = 1, Predicates = [HasFPU] in { +def LFSXTLS : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>; +def LFDXTLS : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>; +} let mayStore = 1 in { def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB), @@ -761,6 +775,14 @@ PPC970_DGroup_Cracked; } +let mayStore = 1, Predicates = [HasFPU] in { +def STFSXTLS : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB), + "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>, + PPC970_DGroup_Cracked; +def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB), + "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>, + PPC970_DGroup_Cracked; +} let isCommutable = 1 in defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll @@ -11,6 +11,18 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE32 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32-O0 @ThreadLocalVarInit = thread_local(localexec) global double 0x4021947AE147AE14, align 8 @VarInit = global double 8.787000e+01, align 8 @@ -23,16 +35,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -42,8 +52,7 @@ ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -57,12 +66,55 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -73,16 +125,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -92,8 +142,7 @@ ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -107,12 +156,55 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -123,16 +215,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -142,8 +232,7 @@ ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -157,12 +246,55 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -173,16 +305,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -192,8 +322,7 @@ ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -207,12 +336,55 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -223,16 +395,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -240,10 +410,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -257,12 +426,55 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -274,9 +486,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -286,9 +497,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -298,11 +508,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -317,8 +526,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -327,6 +535,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: xsadddp f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: xsadddp f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -339,16 +605,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -356,10 +620,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -373,12 +636,55 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -390,9 +696,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -402,9 +707,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -414,11 +718,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -433,8 +736,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -443,6 +745,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: xsadddp f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: xsadddp f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -455,16 +815,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -472,10 +830,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -489,12 +846,55 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -506,9 +906,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -518,9 +917,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -530,11 +928,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -549,8 +946,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -559,6 +955,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: xsadddp f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: xsadddp f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -571,16 +1025,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -588,10 +1040,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -605,12 +1056,55 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -622,9 +1116,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -634,9 +1127,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -646,11 +1138,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -665,8 +1156,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -675,6 +1165,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfd f1, 0(r3) +; SMALL32-O0-NEXT: xsadddp f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfd f1, 0(r3) +; LARGE32-O0-NEXT: xsadddp f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll @@ -11,6 +11,18 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE32 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32-O0 @ThreadLocalVarInit = thread_local(localexec) global float 0x401D333340000000, align 4 @VarInit = global float 0x4021666660000000, align 4 @@ -23,16 +35,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -42,8 +52,7 @@ ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -57,12 +66,55 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store float %x, ptr %0, align 4 @@ -73,16 +125,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -92,8 +142,7 @@ ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -107,12 +156,55 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store float %x, ptr %0, align 4 @@ -123,16 +215,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -142,8 +232,7 @@ ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -157,12 +246,55 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store float %x, ptr %0, align 4 @@ -173,16 +305,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -192,8 +322,7 @@ ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -207,12 +336,55 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store float %x, ptr %0, align 4 @@ -223,16 +395,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -240,10 +410,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -257,12 +426,55 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -274,9 +486,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -286,9 +497,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -298,11 +508,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -317,8 +526,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -327,6 +535,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: fadds f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: fadds f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -339,16 +605,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -356,10 +620,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -373,12 +636,55 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -390,9 +696,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -402,9 +707,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -414,11 +718,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -433,8 +736,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -443,6 +745,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: fadds f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: fadds f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -455,16 +815,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -472,10 +830,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -489,12 +846,55 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -506,9 +906,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -518,9 +917,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -530,11 +928,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -549,8 +946,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -559,6 +955,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: fadds f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: fadds f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -571,16 +1025,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -588,10 +1040,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -605,12 +1056,55 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -622,9 +1116,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -634,9 +1127,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -646,11 +1138,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -665,8 +1156,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -675,6 +1165,64 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f0, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lfs f1, 0(r3) +; SMALL32-O0-NEXT: fadds f1, f0, f1 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f0, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lfs f1, 0(r3) +; LARGE32-O0-NEXT: fadds f1, f0, f1 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load float, ptr %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll @@ -11,6 +11,18 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE32 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32-O0 @ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 @VarInit = global i32 87, align 4 @@ -23,16 +35,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -43,8 +53,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -59,12 +68,63 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r5, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: lwz r5, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r5 +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r5, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r5 +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -75,16 +135,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -95,8 +153,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -111,12 +168,63 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r5, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: lwz r5, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r5 +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r5, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r5 +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -127,16 +235,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -147,8 +253,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -163,12 +268,63 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r5, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C2@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: lwz r5, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r5 +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r5, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r5 +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -179,16 +335,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -199,8 +353,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -215,12 +368,63 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r5, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C3@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: lwz r5, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r5 +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r5, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r5 +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -231,16 +435,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -248,10 +450,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -265,12 +466,55 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -282,9 +526,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -295,9 +538,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -308,11 +550,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -327,8 +568,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -337,6 +577,66 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r4, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r4, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -349,16 +649,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -366,10 +664,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -383,12 +680,55 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -400,9 +740,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -413,9 +752,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -426,11 +764,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -445,8 +782,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -455,6 +791,66 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r4, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r4, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -467,16 +863,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -484,10 +878,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -501,12 +894,55 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -518,9 +954,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -531,9 +966,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -544,11 +978,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -563,8 +996,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -573,6 +1005,66 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r4, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r4, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -585,16 +1077,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -602,10 +1092,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -619,12 +1108,55 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -636,9 +1168,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -649,9 +1180,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -662,11 +1192,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -681,8 +1210,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -691,6 +1219,66 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r4, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r4, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll @@ -11,6 +11,18 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE32 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32-O0 @ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 @VarInit = global i64 87, align 8 @@ -23,16 +35,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -43,9 +53,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -60,13 +70,69 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r6, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: mr r5, r4 +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: # kill: def $r4 killed $r5 +; SMALL32-O0-NEXT: lwz r6, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r6 +; SMALL32-O0-NEXT: stw r5, 4(r4) +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: mr r5, r4 +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: # kill: def $r4 killed $r5 +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r6, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r6 +; LARGE32-O0-NEXT: stw r5, 4(r4) +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -77,16 +143,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -97,9 +161,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -114,13 +178,69 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r6, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: mr r5, r4 +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: # kill: def $r4 killed $r5 +; SMALL32-O0-NEXT: lwz r6, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r6 +; SMALL32-O0-NEXT: stw r5, 4(r4) +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: mr r5, r4 +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: # kill: def $r4 killed $r5 +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r6, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r6 +; LARGE32-O0-NEXT: stw r5, 4(r4) +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -131,16 +251,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -151,9 +269,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -168,13 +286,69 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r6, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C2@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: mr r5, r4 +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: # kill: def $r4 killed $r5 +; SMALL32-O0-NEXT: lwz r6, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r6 +; SMALL32-O0-NEXT: stw r5, 4(r4) +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: mr r5, r4 +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: # kill: def $r4 killed $r5 +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r6, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r6 +; LARGE32-O0-NEXT: stw r5, 4(r4) +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -185,16 +359,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -205,9 +377,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -222,13 +394,69 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r6, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C3@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: mr r5, r4 +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: # kill: def $r4 killed $r5 +; SMALL32-O0-NEXT: lwz r6, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r6 +; SMALL32-O0-NEXT: stw r5, 4(r4) +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: mr r5, r4 +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: # kill: def $r4 killed $r5 +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r6, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r6 +; LARGE32-O0-NEXT: stw r5, 4(r4) +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -239,16 +467,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -258,9 +484,9 @@ ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -274,13 +500,59 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r4, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r4) +; SMALL32-O0-NEXT: lwz r4, 4(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r4, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r4) +; LARGE32-O0-NEXT: lwz r4, 4(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -292,9 +564,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -304,9 +575,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -318,12 +588,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -338,9 +608,9 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -351,6 +621,70 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r6, 4(r3) +; SMALL32-O0-NEXT: lwz r5, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r4, 4(r3) +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addc r4, r4, r6 +; SMALL32-O0-NEXT: adde r3, r3, r5 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r6, 4(r3) +; LARGE32-O0-NEXT: lwz r5, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r4, 4(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addc r4, r4, r6 +; LARGE32-O0-NEXT: adde r3, r3, r5 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -363,16 +697,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -382,9 +714,9 @@ ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -398,13 +730,59 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r4, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r4) +; SMALL32-O0-NEXT: lwz r4, 4(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r4, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r4) +; LARGE32-O0-NEXT: lwz r4, 4(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -416,9 +794,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -428,9 +805,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -442,12 +818,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -462,9 +838,9 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -475,6 +851,70 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadITLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r6, 4(r3) +; SMALL32-O0-NEXT: lwz r5, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r4, 4(r3) +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addc r4, r4, r6 +; SMALL32-O0-NEXT: adde r3, r3, r5 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadITLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r6, 4(r3) +; LARGE32-O0-NEXT: lwz r5, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r4, 4(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addc r4, r4, r6 +; LARGE32-O0-NEXT: adde r3, r3, r5 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -487,16 +927,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -506,9 +944,9 @@ ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -522,13 +960,59 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r4, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r4) +; SMALL32-O0-NEXT: lwz r4, 4(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r4, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r4) +; LARGE32-O0-NEXT: lwz r4, 4(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -540,9 +1024,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -552,9 +1035,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -566,12 +1048,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -586,9 +1068,9 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -599,6 +1081,70 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLUninit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r6, 4(r3) +; SMALL32-O0-NEXT: lwz r5, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r4, 4(r3) +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addc r4, r4, r6 +; SMALL32-O0-NEXT: adde r3, r3, r5 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLUninit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r6, 4(r3) +; LARGE32-O0-NEXT: lwz r5, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r4, 4(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addc r4, r4, r6 +; LARGE32-O0-NEXT: adde r3, r3, r5 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -611,16 +1157,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -630,9 +1174,9 @@ ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -646,13 +1190,59 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r4, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r4) +; SMALL32-O0-NEXT: lwz r4, 4(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r4, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r4) +; LARGE32-O0-NEXT: lwz r4, 4(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -664,9 +1254,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -676,9 +1265,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -690,12 +1278,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -710,9 +1298,9 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -723,6 +1311,70 @@ ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 ; LARGE32-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadTLInit2: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r6, 4(r3) +; SMALL32-O0-NEXT: lwz r5, 0(r3) +; SMALL32-O0-NEXT: lwz r3, L..C4(r2) # @VarInit +; SMALL32-O0-NEXT: lwz r4, 4(r3) +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addc r4, r4, r6 +; SMALL32-O0-NEXT: adde r3, r3, r5 +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadTLInit2: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r6, 4(r3) +; LARGE32-O0-NEXT: lwz r5, 0(r3) +; LARGE32-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE32-O0-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-O0-NEXT: lwz r4, 4(r3) +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addc r4, r4, r6 +; LARGE32-O0-NEXT: adde r3, r3, r5 +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll @@ -44,28 +44,28 @@ ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x22 +; RELOC: Virtual Address: 0x12 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOCU (0x30) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x2A +; RELOC: Virtual Address: 0x1A ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x80 +; RELOC: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (23) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 64 ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x88 +; RELOC: Virtual Address: 0x70 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (21) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -80,7 +80,7 @@ ; SYM-NEXT: Symbols [ ; SYM: Index: 15 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x80 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -98,7 +98,7 @@ ; SYM-NEXT: } ; SYM: Index: 17 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -158,10 +158,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 15) IThreadLocalVarUninit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 15) IThreadLocalVarUninit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stdx 3, 13, 4 ; DIS-NEXT: blr -; DIS: 0000000000000020 (idx: 5) .loadTLInit: +; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0 @@ -170,42 +169,41 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) VarInit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ldx 3, 13, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000048 (idx: 7) VarInit[RW]: -; DIS-NEXT: 48: 00 00 00 00 -; DIS-NEXT: 4c: 00 00 00 57 -; DIS: 0000000000000050 (idx: 9) storeITLUninit[DS]: +; DIS: 0000000000000030 (idx: 7) VarInit[RW]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 34: 00 00 00 57 +; DIS: 0000000000000038 (idx: 9) storeITLUninit[DS]: +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 3c: 00 00 00 00 +; DIS-NEXT: 40: 00 00 00 00 +; DIS-NEXT: 0000000000000040: R_POS (idx: 13) TOC[TC0] +; DIS-NEXT: 44: 00 00 00 68 +; DIS: 0000000000000050 (idx: 11) loadTLInit[DS]: ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 54: 00 00 00 00 +; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 54: 00 00 00 10 ; DIS-NEXT: 58: 00 00 00 00 ; DIS-NEXT: 0000000000000058: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 80 -; DIS: 0000000000000068 (idx: 11) loadTLInit[DS]: +; DIS-NEXT: 5c: 00 00 00 68 +; DIS: 0000000000000068 (idx: 15) IThreadLocalVarUninit[TE]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 6c: 00 00 00 20 +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] +; DIS-NEXT: 6c: 00 00 00 00 +; DIS: 0000000000000070 (idx: 17) ThreadLocalVarInit[TE]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 74: 00 00 00 80 -; DIS: 0000000000000080 (idx: 15) IThreadLocalVarUninit[TE]: -; DIS-NEXT: 80: 00 00 00 00 -; DIS-NEXT: 0000000000000080: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] -; DIS-NEXT: 84: 00 00 00 00 -; DIS: 0000000000000088 (idx: 17) ThreadLocalVarInit[TE]: -; DIS-NEXT: 88: 00 00 00 00 -; DIS-NEXT: 0000000000000088: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] -; DIS-NEXT: 8c: 00 00 00 00 -; DIS: 0000000000000090 (idx: 19) VarInit[TE]: -; DIS-NEXT: 90: 00 00 00 00 -; DIS-NEXT: 0000000000000090: R_POS (idx: 7) VarInit[RW] -; DIS-NEXT: 94: 00 00 00 48 +; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] +; DIS-NEXT: 74: 00 00 00 00 +; DIS: 0000000000000078 (idx: 19) VarInit[TE]: +; DIS-NEXT: 78: 00 00 00 00 +; DIS-NEXT: 0000000000000078: R_POS (idx: 7) VarInit[RW] +; DIS-NEXT: 7c: 00 00 00 30 ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 21) ThreadLocalVarInit[TL]: diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll @@ -202,9 +202,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) IThreadLocalVarUninit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 6 -; DIS-NEXT: stw 4, 4(3) -; DIS-NEXT: stw 5, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 7, 3, 6 +; DIS-NEXT: stwx 5, 3, 6 +; DIS-NEXT: stw 4, 4(7) ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) ; DIS-NEXT: mtlr 0 @@ -219,9 +219,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 5, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(5) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 5, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) VarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(5) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll @@ -44,14 +44,14 @@ ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOC (0x3) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x68 +; RELOC: Virtual Address: 0x60 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (27) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 64 ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x70 +; RELOC: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (25) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -66,7 +66,7 @@ ; SYM-NEXT: Symbols [ ; SYM: Index: 17 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x60 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -84,7 +84,7 @@ ; SYM-NEXT: } ; SYM: Index: 19 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x70 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -142,47 +142,45 @@ ; DIS: 0000000000000000 (idx: 3) .storeITLUninit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 17) IThreadLocalVarUninit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stwx 3, 13, 4 ; DIS-NEXT: blr ; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 8(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 19) ThreadLocalVarInit[TC] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) VarInit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 13, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} extsw 3, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000030 (idx: 9) VarInit: -; DIS-NEXT: 30: 00 00 00 57 -; DIS: 0000000000000038 (idx: 11) storeITLUninit[DS]: -; DIS-NEXT: 8: 00 00 00 00 -; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 3c: 00 00 00 00 -; DIS-NEXT: 40: 00 00 00 00 -; DIS-NEXT: 0000000000000040: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 44: 00 00 00 68 -; DIS: 0000000000000050 (idx: 13) loadTLInit[DS]: +; DIS: 000000000000002c (idx: 9) VarInit: +; DIS-NEXT: 2c: 00 00 00 57 +; DIS: 0000000000000030 (idx: 11) storeITLUninit[DS]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 0000000000000030: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 34: 00 00 00 00 +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 3c: 00 00 00 60 +; DIS: 0000000000000048 (idx: 13) loadTLInit[DS]: +; DIS-NEXT: 48: 00 00 00 00 +; DIS-NEXT: 0000000000000048: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 4c: 00 00 00 10 ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 54: 00 00 00 10 -; DIS-NEXT: 58: 00 00 00 00 -; DIS-NEXT: 0000000000000058: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 68 -; DIS: 0000000000000068 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 0000000000000050: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 54: 00 00 00 60 +; DIS: 0000000000000060 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 60: 00 00 00 00 +; DIS-NEXT: 0000000000000060: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] +; DIS: 0000000000000068 (idx: 19) ThreadLocalVarInit[TC]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] -; DIS: 0000000000000070 (idx: 19) ThreadLocalVarInit[TC]: +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 25) ThreadLocalVarInit +; DIS: 0000000000000070 (idx: 21) VarInit[TC]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 25) ThreadLocalVarInit -; DIS: 0000000000000078 (idx: 21) VarInit[TC]: -; DIS-NEXT: 78: 00 00 00 00 -; DIS-NEXT: 0000000000000078: R_POS (idx: 9) VarInit +; DIS-NEXT: 0000000000000070: R_POS (idx: 9) VarInit ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 25) ThreadLocalVarInit: diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll @@ -51,21 +51,21 @@ ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOC (0x3) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x40 +; RELOC: Virtual Address: 0x44 ; RELOC-NEXT: Symbol: .__get_tpointer (1) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 26 ; RELOC-NEXT: Type: R_RBA (0x18) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x84 +; RELOC: Virtual Address: 0x80 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (29) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x88 +; RELOC: Virtual Address: 0x84 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (27) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -99,7 +99,7 @@ ; SYM-NEXT: } ; SYM: Index: 19 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x84 +; SYM-NEXT: Value (RelocatableAddress): 0x80 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -118,7 +118,7 @@ ; SYM-NEXT: } ; SYM: Index: 21 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Value (RelocatableAddress): 0x84 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -185,8 +185,7 @@ ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] ; DIS-NEXT: stw 0, 40(1) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 5 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stwx 4, 3, 5 ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) ; DIS-NEXT: mtlr 0 @@ -198,12 +197,11 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) ThreadLocalVarInit[TC] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) VarInit[TC] +; DIS-NEXT: stw 0, 40(1) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] -; DIS-NEXT: stw 0, 40(1) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 3, 4 ; DIS-NEXT: lwz 4, 0(5) -; DIS-NEXT: lwz 3, 0(3) ; DIS-NEXT: add 3, 4, 3 ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) @@ -211,29 +209,29 @@ ; DIS-NEXT: blr ; DIS: Disassembly of section .data: -; DIS: 00000068 (idx: 11) VarInit: -; DIS-NEXT: 68: 00 00 00 57 -; DIS: 0000006c (idx: 13) storeITLUninit[DS]: -; DIS-NEXT: 6c: 00 00 00 00 -; DIS-NEXT: 0000006c: R_POS (idx: 5) .storeITLUninit -; DIS-NEXT: 70: 00 00 00 84 -; DIS-NEXT: 00000070: R_POS (idx: 17) TOC[TC0] -; DIS-NEXT: 74: 00 00 00 00 -; DIS: 00000078 (idx: 15) loadTLInit[DS]: -; DIS-NEXT: 78: 00 00 00 30 -; DIS-NEXT: 00000078: R_POS (idx: 7) .loadTLInit -; DIS-NEXT: 7c: 00 00 00 84 -; DIS-NEXT: 0000007c: R_POS (idx: 17) TOC[TC0] +; DIS: 00000064 (idx: 11) VarInit: +; DIS-NEXT: 64: 00 00 00 57 +; DIS: 00000068 (idx: 13) storeITLUninit[DS]: +; DIS-NEXT: 68: 00 00 00 00 +; DIS-NEXT: 00000068: R_POS (idx: 5) .storeITLUninit +; DIS-NEXT: 6c: 00 00 00 80 +; DIS-NEXT: 0000006c: R_POS (idx: 17) TOC[TC0] +; DIS-NEXT: 70: 00 00 00 00 +; DIS: 00000074 (idx: 15) loadTLInit[DS]: +; DIS-NEXT: 74: 00 00 00 30 +; DIS-NEXT: 00000074: R_POS (idx: 7) .loadTLInit +; DIS-NEXT: 78: 00 00 00 80 +; DIS-NEXT: 00000078: R_POS (idx: 17) TOC[TC0] +; DIS-NEXT: 7c: 00 00 00 00 +; DIS: 00000080 (idx: 19) IThreadLocalVarUninit[TC]: ; DIS-NEXT: 80: 00 00 00 00 -; DIS: 00000084 (idx: 19) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 00000080: R_TLS_LE (idx: 29) IThreadLocalVarUninit[UL] +; DIS: 00000084 (idx: 21) ThreadLocalVarInit[TC]: ; DIS-NEXT: 84: 00 00 00 00 -; DIS-NEXT: 00000084: R_TLS_LE (idx: 29) IThreadLocalVarUninit[UL] -; DIS: 00000088 (idx: 21) ThreadLocalVarInit[TC]: -; DIS-NEXT: 88: 00 00 00 00 -; DIS-NEXT: 00000088: R_TLS_LE (idx: 27) ThreadLocalVarInit -; DIS: 0000008c (idx: 23) VarInit[TC]: -; DIS-NEXT: 8c: 00 00 00 68 -; DIS-NEXT: 0000008c: R_POS (idx: 11) VarInit +; DIS-NEXT: 00000084: R_TLS_LE (idx: 27) ThreadLocalVarInit +; DIS: 00000088 (idx: 23) VarInit[TC]: +; DIS-NEXT: 88: 00 00 00 64 +; DIS-NEXT: 00000088: R_POS (idx: 11) VarInit ; DIS: Disassembly of section .tdata: ; DIS: 00000000 (idx: 27) ThreadLocalVarInit: diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -592,8 +592,8 @@ ; ; CHECK-32-P10-LABEL: testDouble1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -16(1) ; CHECK-32-P10-NEXT: stfdx 1, 4, 3 ; CHECK-32-P10-NEXT: lxv 34, -16(1) @@ -650,8 +650,8 @@ ; CHECK-32-P10-LABEL: testDouble2: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lfd 0, 0(3) -; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 @@ -723,8 +723,8 @@ ; CHECK-32-P10-LABEL: testDouble3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0 -; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 diff --git a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll --- a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll +++ b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll @@ -53,7 +53,7 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, var_short@got@tprel@ha ; CHECK-NEXT: ld 3, var_short@got@tprel@l(3) -; CHECK-NEXT: lhzx 3, 3, var_short@tls +; CHECK-NEXT: lhax 3, 3, var_short@tls ; CHECK-NEXT: blr entry: %0 = load i16, ptr @var_short, align 2, !tbaa !7 @@ -95,7 +95,7 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, var_int@got@tprel@ha ; CHECK-NEXT: ld 3, var_int@got@tprel@l(3) -; CHECK-NEXT: lwzx 3, 3, var_int@tls +; CHECK-NEXT: lwax 3, 3, var_int@tls ; CHECK-NEXT: blr entry: %0 = load i32, ptr @var_int, align 4, !tbaa !9