diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -1296,11 +1296,9 @@ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD, LDBRX, DFLOADf32, DFLOADf64, LFD, - LFDX, XFLOADf32, XFLOADf64, LFIWAX, LIWAX, LFIWZX, LIWZX, LHA, LHA8, - LHAX, LHAX8, LHBRX, LHBRX8, LHZ, LHZ8, LVEBX, @@ -1309,7 +1307,7 @@ LVX, LVXL, LWA, LWA_32, - LWAX, LWAX_32, + LWAX, LWAXTLS, LWAXTLS_32, LWAX_32, LWBRX, LWBRX8, LWZ, LWZ8, LWZtoc, LWZtocL, LXSD, @@ -1340,6 +1338,8 @@ ICBT, LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32, LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX, + LFDX, LFDXTLS, XFLOADf32, XFLOADf64, + LHAX, LHAX8, LHAXTLS, LHAXTLS_32, LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32, LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32, LXVL, @@ -1442,11 +1442,17 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], (instrs LFS, - LFSX, LXSSP, LXSSPX )>; +// 2-way crack instructions +// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands +def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C], + (instrs + LFSX, LFSXTLS +)>; + // 4-way crack instructions // 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY], @@ -1823,12 +1829,10 @@ DFSTOREf32, DFSTOREf64, STFD, STFDU, STFDUX, - STFDX, STFIWX, STIWX, STFS, STFSU, STFSUX, - STFSX, STH, STH8, STHBRX, STHU, STHU8, @@ -1867,6 +1871,8 @@ CP_COPY, CP_COPY8, STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32, SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_, + STFDX, STFDXTLS, + STFSX, STFSXTLS, STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32, STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32, STXVL, diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -765,6 +765,7 @@ (instrs LFIWZX, LFDX, + LFDXTLS, LFD )>; @@ -815,9 +816,9 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C], (instrs - (instregex "LHA(X)?(8)?$"), + (instregex "LHA(X)?(TLS)?(8)?(_32)?$"), (instregex "CP_PASTE(8)?_rec$"), - (instregex "LWA(X)?(_32)?$"), + (instregex "LWA(X)?(TLS)?(_32)?$"), TCHECK )>; @@ -850,6 +851,7 @@ DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs LFSX, + LFSXTLS, LFS )>; @@ -891,7 +893,7 @@ // all three dispatches for the superslice. def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], (instrs - (instregex "STF(S|D|IWX|SX|DX)$"), + (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS)$"), (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), (instregex "STW(8)?$"), (instregex "(D|X)FSTORE(f32|f64)$"), diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -704,6 +704,35 @@ return false; } +// canOptimizeTLSDFormToXFormOnAIX - For TLS local-exec accesses on AIX 64-bit, +// an ADD_TLS node is produced to add the result of loading the variable offset +// to the thread pointer (X13). This add, followed by a D-Form memory operation, +// can be optimized to use an X-Form load or store, allowing the ADD_TLS node to +// be removed completely. This can be done as long as the memory operation's +// immediate offset is 0 and that the thread pointer is being added to. +static bool canOptimizeTLSDFormToXFormOnAIX(SelectionDAG *CurDAG, + SDValue Base, uint64_t Offset) { + const PPCSubtarget &Subtarget = + CurDAG->getMachineFunction().getSubtarget(); + if (Subtarget.isAIXABI()) { + // As seen in tryTLSXForm[Load|Store], Base is an ADD_TLS node. The first + // operand in this instruction should be the thread pointer (X13), so this + // optimization should not be performed if the thread pointer is not being + // added to. + RegisterSDNode *AddFirstOpReg = + dyn_cast_or_null(Base.getOperand(0).getNode()); + if (!AddFirstOpReg) + return false; + if (AddFirstOpReg->getReg() != PPC::X13) + return false; + // The optimization to convert the D-Form load/store into its X-Form + // counterpart should only occur if the immediate offset is 0. + if (Offset != 0) + return false; + } + return true; +} + bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Base = ST->getBasePtr(); if (Base.getOpcode() != PPCISD::ADD_TLS) @@ -713,6 +742,9 @@ return false; if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) return false; + // Check if the 64-bit local-exec sequence on AIX can be optimized. + if (!canOptimizeTLSDFormToXFormOnAIX(CurDAG, Base, ST->getSrcValueOffset())) + return false; SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); @@ -738,6 +770,14 @@ Opcode = PPC::STDXTLS; break; } + case MVT::f32: { + Opcode = PPC::STFSXTLS; + break; + } + case MVT::f64: { + Opcode = PPC::STFDXTLS; + break; + } } SDValue Chain = ST->getChain(); SDVTList VTs = ST->getVTList(); @@ -758,10 +798,14 @@ return false; if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) return false; + // Check if the 64-bit local-exec sequence on AIX can be optimized. + if (!canOptimizeTLSDFormToXFormOnAIX(CurDAG, Base, LD->getSrcValueOffset())) + return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); EVT RegVT = LD->getValueType(0); + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; unsigned Opcode; switch (MemVT.getSimpleVT().SimpleTy) { default: @@ -771,17 +815,31 @@ break; } case MVT::i16: { - Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; + if (isSExt) + Opcode = (RegVT == MVT::i32) ? PPC::LHAXTLS_32 : PPC::LHAXTLS; + else + Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; break; } case MVT::i32: { - Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; + if (isSExt) + Opcode = (RegVT == MVT::i32) ? PPC::LWAXTLS_32 : PPC::LWAXTLS; + else + Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; break; } case MVT::i64: { Opcode = PPC::LDXTLS; break; } + case MVT::f32: { + Opcode = PPC::LFSXTLS; + break; + } + case MVT::f64: { + Opcode = PPC::LFDXTLS; + break; + } } SDValue Chain = LD->getChain(); SDVTList VTs = LD->getVTList(); @@ -5405,9 +5463,10 @@ } case ISD::STORE: { - // Change TLS initial-exec D-form stores to X-form stores. + // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to + // X-form stores. StoreSDNode *ST = cast(N); - if (EnableTLSOpt && Subtarget->isELFv2ABI() && + if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; @@ -5420,8 +5479,9 @@ // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { - // Change TLS initial-exec D-form loads to X-form loads. - if (EnableTLSOpt && Subtarget->isELFv2ABI()) + // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to + // X-form loads. + if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI())) if (tryTLSXFormLoad(LD)) return; break; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -724,18 +724,32 @@ "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>; def LHZXTLS : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LHAXTLS : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lhax $RST, $RA, $RB", IIC_LdStLoad, []>; def LWZXTLS : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LWAXTLS : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lwax $RST, $RA, $RB", IIC_LdStLoad, []>; def LDXTLS : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64; def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>; def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LHAXTLS_32 : XForm_1<31, 343, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lhax $RST, $RA, $RB", IIC_LdStLoad, []>; def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>; +def LWAXTLS_32 : XForm_1<31, 341, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lwax $RST, $RA, $RB", IIC_LdStLoad, []>; } +let mayLoad = 1, Predicates = [HasFPU] in { +def LFSXTLS : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>; +def LFDXTLS : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB), + "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>; +} let mayStore = 1 in { def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB), @@ -761,6 +775,14 @@ PPC970_DGroup_Cracked; } +let mayStore = 1, Predicates = [HasFPU] in { +def STFSXTLS : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB), + "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>, + PPC970_DGroup_Cracked; +def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB), + "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>, + PPC970_DGroup_Cracked; +} let isCommutable = 1 in defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global double 0x4021947AE147AE14, align 8 @VarInit = global double 8.787000e+01, align 8 @@ -17,17 +23,30 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -38,17 +57,30 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -59,17 +91,30 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -80,17 +125,30 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -101,17 +159,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -123,9 +194,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -135,11 +205,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -152,17 +243,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -174,9 +278,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -186,11 +289,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -203,17 +327,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -225,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -237,11 +373,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -254,17 +411,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -276,9 +446,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -288,11 +457,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global float 0x401D333340000000, align 4 @VarInit = global float 0x4021666660000000, align 4 @@ -17,17 +23,30 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store float %x, ptr %0, align 4 @@ -38,17 +57,30 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store float %x, ptr %0, align 4 @@ -59,17 +91,30 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store float %x, ptr %0, align 4 @@ -80,17 +125,30 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store float %x, ptr %0, align 4 @@ -101,17 +159,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -123,9 +194,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -135,11 +205,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -152,17 +243,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -174,9 +278,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -186,11 +289,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -203,17 +327,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -225,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -237,11 +373,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -254,17 +411,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -276,9 +446,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -288,11 +457,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lfs f1, 0(r3) +; SMALL64-O0-NEXT: fadds f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lfs f1, 0(r3) +; LARGE64-O0-NEXT: fadds f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load float, ptr %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 @VarInit = global i32 87, align 4 @@ -17,17 +23,32 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -38,17 +59,32 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -59,17 +95,32 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C2@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -80,17 +131,32 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C3@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -101,17 +167,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -123,9 +202,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -136,12 +214,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -154,17 +255,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -176,9 +290,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -189,12 +302,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -207,17 +343,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -229,9 +378,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -242,12 +390,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -260,17 +431,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -282,9 +466,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -295,12 +478,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 @VarInit = global i64 87, align 8 @@ -17,17 +23,30 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -38,17 +57,30 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -59,17 +91,30 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C2@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -80,17 +125,30 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C3@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -101,17 +159,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -123,9 +194,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -135,11 +205,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -152,17 +243,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -174,9 +278,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -186,11 +289,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -203,17 +327,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -225,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -237,11 +373,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -254,17 +411,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -276,9 +446,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -288,11 +457,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll @@ -48,7 +48,7 @@ ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x22 +; RELOC-NEXT: Virtual Address: 0x12 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -56,7 +56,7 @@ ; RELOC-NEXT: Type: R_TOCU (0x30) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x26 +; RELOC-NEXT: Virtual Address: 0x16 ; RELOC-NEXT: Symbol: VarInit (19) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -64,7 +64,7 @@ ; RELOC-NEXT: Type: R_TOCU (0x30) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x2A +; RELOC-NEXT: Virtual Address: 0x1A ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -72,7 +72,7 @@ ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x2E +; RELOC-NEXT: Virtual Address: 0x1E ; RELOC-NEXT: Symbol: VarInit (19) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -82,7 +82,7 @@ ; RELOC-NEXT: } ; RELOC-NEXT: Section (index: 2) .data { ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x50 +; RELOC-NEXT: Virtual Address: 0x38 ; RELOC-NEXT: Symbol: .storeITLUninit (3) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -90,7 +90,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x58 +; RELOC-NEXT: Virtual Address: 0x40 ; RELOC-NEXT: Symbol: TOC (13) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -98,7 +98,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x68 +; RELOC-NEXT: Virtual Address: 0x50 ; RELOC-NEXT: Symbol: .loadTLInit (5) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -106,7 +106,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x70 +; RELOC-NEXT: Virtual Address: 0x58 ; RELOC-NEXT: Symbol: TOC (13) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -114,7 +114,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x80 +; RELOC-NEXT: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (23) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -122,7 +122,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x88 +; RELOC-NEXT: Virtual Address: 0x70 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (21) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -130,7 +130,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x90 +; RELOC-NEXT: Virtual Address: 0x78 ; RELOC-NEXT: Symbol: VarInit (7) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -165,7 +165,7 @@ ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { ; SYM-NEXT: Index: 2 -; SYM-NEXT: SectionLen: 68 +; SYM-NEXT: SectionLen: 48 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 5 @@ -196,7 +196,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 5 ; SYM-NEXT: Name: .loadTLInit -; SYM-NEXT: Value (RelocatableAddress): 0x20 +; SYM-NEXT: Value (RelocatableAddress): 0x10 ; SYM-NEXT: Section: .text ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -215,7 +215,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 7 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x48 +; SYM-NEXT: Value (RelocatableAddress): 0x30 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -234,7 +234,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 9 ; SYM-NEXT: Name: storeITLUninit -; SYM-NEXT: Value (RelocatableAddress): 0x50 +; SYM-NEXT: Value (RelocatableAddress): 0x38 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -253,7 +253,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 11 ; SYM-NEXT: Name: loadTLInit -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x50 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -272,7 +272,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 13 ; SYM-NEXT: Name: TOC -; SYM-NEXT: Value (RelocatableAddress): 0x80 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -291,7 +291,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 15 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x80 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -310,7 +310,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 17 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -329,7 +329,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 19 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x90 +; SYM-NEXT: Value (RelocatableAddress): 0x78 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -392,10 +392,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 15) IThreadLocalVarUninit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 15) IThreadLocalVarUninit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stdx 3, 13, 4 ; DIS-NEXT: blr -; DIS: 0000000000000020 (idx: 5) .loadTLInit: +; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0 @@ -404,42 +403,41 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) VarInit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ldx 3, 13, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000048 (idx: 7) VarInit[RW]: -; DIS-NEXT: 48: 00 00 00 00 -; DIS-NEXT: 4c: 00 00 00 57 -; DIS: 0000000000000050 (idx: 9) storeITLUninit[DS]: +; DIS: 0000000000000030 (idx: 7) VarInit[RW]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 34: 00 00 00 57 +; DIS: 0000000000000038 (idx: 9) storeITLUninit[DS]: +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 3c: 00 00 00 00 +; DIS-NEXT: 40: 00 00 00 00 +; DIS-NEXT: 0000000000000040: R_POS (idx: 13) TOC[TC0] +; DIS-NEXT: 44: 00 00 00 68 +; DIS: 0000000000000050 (idx: 11) loadTLInit[DS]: ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 54: 00 00 00 00 +; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 54: 00 00 00 10 ; DIS-NEXT: 58: 00 00 00 00 ; DIS-NEXT: 0000000000000058: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 80 -; DIS: 0000000000000068 (idx: 11) loadTLInit[DS]: +; DIS-NEXT: 5c: 00 00 00 68 +; DIS: 0000000000000068 (idx: 15) IThreadLocalVarUninit[TE]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 6c: 00 00 00 20 +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] +; DIS-NEXT: 6c: 00 00 00 00 +; DIS: 0000000000000070 (idx: 17) ThreadLocalVarInit[TE]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 74: 00 00 00 80 -; DIS: 0000000000000080 (idx: 15) IThreadLocalVarUninit[TE]: -; DIS-NEXT: 80: 00 00 00 00 -; DIS-NEXT: 0000000000000080: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] -; DIS-NEXT: 84: 00 00 00 00 -; DIS: 0000000000000088 (idx: 17) ThreadLocalVarInit[TE]: -; DIS-NEXT: 88: 00 00 00 00 -; DIS-NEXT: 0000000000000088: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] -; DIS-NEXT: 8c: 00 00 00 00 -; DIS: 0000000000000090 (idx: 19) VarInit[TE]: -; DIS-NEXT: 90: 00 00 00 00 -; DIS-NEXT: 0000000000000090: R_POS (idx: 7) VarInit[RW] -; DIS-NEXT: 94: 00 00 00 48 +; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] +; DIS-NEXT: 74: 00 00 00 00 +; DIS: 0000000000000078 (idx: 19) VarInit[TE]: +; DIS-NEXT: 78: 00 00 00 00 +; DIS-NEXT: 0000000000000078: R_POS (idx: 7) VarInit[RW] +; DIS-NEXT: 7c: 00 00 00 30 ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 21) ThreadLocalVarInit[TL]: diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll @@ -58,7 +58,7 @@ ; RELOC-NEXT: } ; RELOC-NEXT: Section (index: 2) .data { ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x38 +; RELOC-NEXT: Virtual Address: 0x30 ; RELOC-NEXT: Symbol: .storeITLUninit (3) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -66,7 +66,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x40 +; RELOC-NEXT: Virtual Address: 0x38 ; RELOC-NEXT: Symbol: TOC (15) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -74,7 +74,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x50 +; RELOC-NEXT: Virtual Address: 0x48 ; RELOC-NEXT: Symbol: .loadTLInit (5) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -82,7 +82,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x58 +; RELOC-NEXT: Virtual Address: 0x50 ; RELOC-NEXT: Symbol: TOC (15) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -90,7 +90,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x68 +; RELOC-NEXT: Virtual Address: 0x60 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (27) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -98,7 +98,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x70 +; RELOC-NEXT: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (25) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -106,7 +106,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x78 +; RELOC-NEXT: Virtual Address: 0x70 ; RELOC-NEXT: Symbol: VarInit (9) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -141,7 +141,7 @@ ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { ; SYM-NEXT: Index: 2 -; SYM-NEXT: SectionLen: 48 +; SYM-NEXT: SectionLen: 44 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 5 @@ -191,7 +191,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 7 ; SYM-NEXT: Name: .data -; SYM-NEXT: Value (RelocatableAddress): 0x30 +; SYM-NEXT: Value (RelocatableAddress): 0x2C ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -210,7 +210,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 9 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x30 +; SYM-NEXT: Value (RelocatableAddress): 0x2C ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -229,7 +229,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 11 ; SYM-NEXT: Name: storeITLUninit -; SYM-NEXT: Value (RelocatableAddress): 0x38 +; SYM-NEXT: Value (RelocatableAddress): 0x30 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -248,7 +248,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 13 ; SYM-NEXT: Name: loadTLInit -; SYM-NEXT: Value (RelocatableAddress): 0x50 +; SYM-NEXT: Value (RelocatableAddress): 0x48 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -267,7 +267,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 15 ; SYM-NEXT: Name: TOC -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x60 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -286,7 +286,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 17 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x60 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -305,7 +305,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 19 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x70 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -324,7 +324,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 21 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x78 +; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -404,47 +404,45 @@ ; DIS: 0000000000000000 (idx: 3) .storeITLUninit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 17) IThreadLocalVarUninit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stwx 3, 13, 4 ; DIS-NEXT: blr ; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 8(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 19) ThreadLocalVarInit[TC] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) VarInit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 13, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} extsw 3, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000030 (idx: 9) VarInit: -; DIS-NEXT: 30: 00 00 00 57 -; DIS: 0000000000000038 (idx: 11) storeITLUninit[DS]: -; DIS-NEXT: 8: 00 00 00 00 -; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 3c: 00 00 00 00 -; DIS-NEXT: 40: 00 00 00 00 -; DIS-NEXT: 0000000000000040: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 44: 00 00 00 68 -; DIS: 0000000000000050 (idx: 13) loadTLInit[DS]: +; DIS: 000000000000002c (idx: 9) VarInit: +; DIS-NEXT: 2c: 00 00 00 57 +; DIS: 0000000000000030 (idx: 11) storeITLUninit[DS]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 0000000000000030: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 34: 00 00 00 00 +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 3c: 00 00 00 60 +; DIS: 0000000000000048 (idx: 13) loadTLInit[DS]: +; DIS-NEXT: 48: 00 00 00 00 +; DIS-NEXT: 0000000000000048: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 4c: 00 00 00 10 ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 54: 00 00 00 10 -; DIS-NEXT: 58: 00 00 00 00 -; DIS-NEXT: 0000000000000058: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 68 -; DIS: 0000000000000068 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 0000000000000050: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 54: 00 00 00 60 +; DIS: 0000000000000060 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 60: 00 00 00 00 +; DIS-NEXT: 0000000000000060: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] +; DIS: 0000000000000068 (idx: 19) ThreadLocalVarInit[TC]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] -; DIS: 0000000000000070 (idx: 19) ThreadLocalVarInit[TC]: +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 25) ThreadLocalVarInit +; DIS: 0000000000000070 (idx: 21) VarInit[TC]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 25) ThreadLocalVarInit -; DIS: 0000000000000078 (idx: 21) VarInit[TC]: -; DIS-NEXT: 78: 00 00 00 00 -; DIS-NEXT: 0000000000000078: R_POS (idx: 9) VarInit +; DIS-NEXT: 0000000000000070: R_POS (idx: 9) VarInit ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 25) ThreadLocalVarInit: diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -592,8 +592,8 @@ ; ; CHECK-32-P10-LABEL: testDouble1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -16(1) ; CHECK-32-P10-NEXT: stfdx 1, 4, 3 ; CHECK-32-P10-NEXT: lxv 34, -16(1) @@ -650,8 +650,8 @@ ; CHECK-32-P10-LABEL: testDouble2: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lfd 0, 0(3) -; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 @@ -723,8 +723,8 @@ ; CHECK-32-P10-LABEL: testDouble3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0 -; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 diff --git a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll --- a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll +++ b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll @@ -53,7 +53,7 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, var_short@got@tprel@ha ; CHECK-NEXT: ld 3, var_short@got@tprel@l(3) -; CHECK-NEXT: lhzx 3, 3, var_short@tls +; CHECK-NEXT: lhax 3, 3, var_short@tls ; CHECK-NEXT: blr entry: %0 = load i16, ptr @var_short, align 2, !tbaa !7 @@ -95,7 +95,7 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, var_int@got@tprel@ha ; CHECK-NEXT: ld 3, var_int@got@tprel@l(3) -; CHECK-NEXT: lwzx 3, 3, var_int@tls +; CHECK-NEXT: lwax 3, 3, var_int@tls ; CHECK-NEXT: blr entry: %0 = load i32, ptr @var_int, align 4, !tbaa !9